From 0acdd511555a5e1d906579dd20c6430a0df2e267 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 26 Sep 2025 11:19:57 +0800 Subject: [PATCH 01/26] WIP: implementing case range analyzer Signed-off-by: Yuanchun Shen --- .../expression/function/PPLFuncImpTable.java | 12 +- .../remote/CalcitePPLCaseFunctionIT.java | 141 ++++++++++ .../opensearch/request/AggregateAnalyzer.java | 79 +++++- .../opensearch/request/CaseRangeAnalyzer.java | 247 ++++++++++++++++++ .../opensearch/response/agg/RangeParser.java | 60 +++++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 2 +- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 4 +- 7 files changed, 527 insertions(+), 18 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 8752a0ae822..1d9ba2bb3df 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -894,19 +894,15 @@ void populate() { XOR, SqlStdOperatorTable.NOT_EQUALS, PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.BOOLEAN)); - // SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a - // type checker - // for it. The second and third operands are required to be of the same type. If - // not, - // it will throw an IllegalArgumentException with information Can't find - // leastRestrictive type + // SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a type checker + // for it. The second and third operands are required to be of the same type. If not, it will + // throw an IllegalArgumentException with information Can't find leastRestrictive type registerOperator( IF, SqlStdOperatorTable.CASE, PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ANY)); // Re-define the type checker for is not null, is present, and is null since - // their original - // type checker ANY isn't compatible with struct types. + // their original type checker ANY isn't compatible with struct types. registerOperator( IS_NOT_NULL, SqlStdOperatorTable.IS_NOT_NULL, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index 7e4425d3a41..42dc35a29bf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -6,6 +6,8 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -246,4 +248,143 @@ public void testCaseWhenInSubquery() throws IOException { rows("0.0.0.2", "GET", null, "4085", "500", "/shuttle/missions/sts-73/mission-sts-73.html"), rows("::3", "GET", null, "3985", "403", "/shuttle/countdown/countdown.html")); } + + @Test + public void testCaseRangeAggregationPushdown() throws IOException { + // Test CASE expression that can be optimized to range aggregation + // Note: This has an implicit ELSE NULL, so it won't be optimized + // But it should still work correctly + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval range_bucket = case(" + + " cast(bytes as int) < 1000, 'small'," + + " cast(bytes as int) >= 1000 AND cast(bytes as int) < 5000, 'medium'," + + " cast(bytes as int) >= 5000, 'large'" + + ") | stats count() as total by range_bucket | sort range_bucket", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("range_bucket", "string"), + schema("total", "long")); + + // This should work but won't be optimized due to implicit NULL bucket + assertTrue(actual.getJSONArray("datarows").length() > 0); + } + + @Test + public void testCaseRangeAggregationWithMetrics() throws IOException { + // Test CASE-to-range with additional aggregations + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval size_category = case(" + + " cast(bytes as int) < 2000, 'small'," + + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium'," + + " cast(bytes as int) >= 5000, 'large'" + + ") | stats count() as total, avg(cast(bytes as int)) as avg_bytes by size_category" + + " | sort size_category", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("size_category", "string"), + schema("total", "long"), + schema("avg_bytes", "double")); + + // Verify we get results for each category + // The exact values may vary based on test data, but structure should be correct + assertEquals(3, actual.getJSONArray("datarows").length()); + } + + @Test + public void testCaseRangeAggregationWithElse() throws IOException { + // Test CASE with explicit ELSE clause + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval status_category = case(" + + " cast(response as int) < 300, 'success'," + + " cast(response as int) >= 300 AND cast(response as int) < 400, 'redirect'," + + " cast(response as int) >= 400 AND cast(response as int) < 500, 'client_error'," + + " cast(response as int) >= 500, 'server_error'" + + " else 'unknown'" + + ") | stats count() by status_category | sort status_category", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("status_category", "string"), + schema("count()", "long")); + + // Should handle the ELSE case for null/non-numeric responses + assertTrue(actual.getJSONArray("datarows").length() > 0); + } + + @Test + public void testNonOptimizableCaseExpression() throws IOException { + // Test CASE that cannot be optimized (different fields) + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval mixed_condition = case(" + + " cast(bytes as int) < 1000, 'small_bytes'," + + " cast(response as int) >= 400, 'error_response'" + + " else 'other'" + + ") | stats count() by mixed_condition", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("mixed_condition", "string"), + schema("count()", "long")); + + // This should work but won't be optimized + assertTrue(actual.getJSONArray("datarows").length() > 0); + } + + @Test + public void testCaseWithNonLiteralResult() throws IOException { + // Test CASE that cannot be optimized (non-literal results) + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval computed_result = case(" + + " cast(bytes as int) < 1000, concat('small_', host)," + + " cast(bytes as int) >= 1000, concat('large_', host)" + + ") | stats count() by computed_result | head 3", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("computed_result", "string"), + schema("count()", "long")); + + // This should work but won't be optimized to range aggregation + assertTrue(actual.getJSONArray("datarows").length() > 0); + } + + @Test + public void testOptimizableCaseRangeAggregation() throws IOException { + // Test CASE that could be optimized if all ranges are covered with explicit ELSE + JSONObject actual = + executeQuery( + String.format( + "source=%s | eval size_bucket = case(" + + " cast(bytes as int) < 2000, 'small'," + + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium'," + + " cast(bytes as int) >= 5000, 'large'" + + " else 'unknown'" + + ") | stats count() by size_bucket | sort size_bucket", + TEST_INDEX_WEBLOGS)); + + verifySchema( + actual, + schema("size_bucket", "string"), + schema("count()", "long")); + + // This should work - the explicit ELSE makes it potentially optimizable + assertTrue(actual.getJSONArray("datarows").length() > 0); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 9c08779532a..65355a809b1 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -38,7 +38,10 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; +import java.util.stream.Collectors; + import lombok.RequiredArgsConstructor; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.core.Aggregate; @@ -57,6 +60,7 @@ import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactories.Builder; import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; @@ -76,10 +80,12 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; +import org.opensearch.sql.opensearch.response.agg.RangeParser; import org.opensearch.sql.opensearch.response.agg.SinglePercentileParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.response.agg.StatsParser; @@ -196,21 +202,80 @@ public static Pair, OpenSearchAggregationResponseParser Pair> builderAndParser = processAggregateCalls(aggFieldNames, aggregate.getAggCallList(), project, helper); Builder metricBuilder = builderAndParser.getLeft(); - List metricParserList = builderAndParser.getRight(); + List metricParsers = builderAndParser.getRight(); + // Find group by fields derived from CASE functions and convert them to range queries + CaseRangeAnalyzer rangeAnalyzer = CaseRangeAnalyzer.create(rowType); + List> groupsByCase = groupList.stream() + .map(i -> Pair.of(i, project.getProjects().get(i))) + .filter(p -> p.getRight() instanceof RexCall rexCall && rexCall.getKind() == SqlKind.CASE) + .map(p -> Pair.of(p.getLeft(), rangeAnalyzer.analyze((RexCall) p.getRight()))) + .filter(p -> p.getRight().isPresent()) + .map(p -> Pair.of(p.getLeft(), p.getRight().get())) + .toList(); + + // Cascade aggregations in such a way: + // RangeAggregation + // ...Any other range aggregations + // Metric Aggregation comes at last + // Note that but a composite aggregation can not be a sub aggregation of range aggregation, + // but range aggregation can be a sub aggregation of a composite aggregation. + AggregationBuilder rangeAggregationBuilder = null; + if (!groupsByCase.isEmpty()) { + for (int i = 0; i < groupsByCase.size(); i++) { + Pair pair = groupsByCase.get(i); + if (i == 0) { + rangeAggregationBuilder = pair.getRight(); + } else { + groupsByCase.get(i - 1).getRight().subAggregation(pair.getRight()); + } + } + groupsByCase.getLast().getRight().subAggregations(metricBuilder); + metricParsers.add(new RangeParser("case_range")); + } + + // Remove groups that are converted to ranges from groupList + Set toRemove = groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); + List filteredGroupList = groupList.stream().filter(i -> !toRemove.contains(i)).toList(); - if (aggregate.getGroupSet().isEmpty()) { + // The top-level query is a range query: stats count() by range_field + // RangeAgg + // Metric + if (!groupsByCase.isEmpty() && filteredGroupList.isEmpty()) { + return Pair.of(List.of(rangeAggregationBuilder), new BucketAggregationParser(metricParsers)); + } + // No parent composite aggregation or range aggregation is attached: stats count() + // Metric + else if (aggregate.getGroupSet().isEmpty() && filteredGroupList.isEmpty()) { return Pair.of( ImmutableList.copyOf(metricBuilder.getAggregatorFactories()), - new NoBucketAggregationParser(metricParserList)); - } else { + new NoBucketAggregationParser(metricParsers)); + } + // It has both composite aggregation and range aggregation: stats count() by range_field, non_range_field + // CompositeAgg + // RangeAgg + // Metric + else if (!groupsByCase.isEmpty()) { List> buckets = - createCompositeBuckets(groupList, project, helper); + createCompositeBuckets(filteredGroupList, project, helper); + return Pair.of( + Collections.singletonList( + AggregationBuilders.composite("composite_buckets", buckets) + .subAggregation(rangeAggregationBuilder) + .size(AGGREGATION_BUCKET_SIZE)), + new CompositeAggregationParser(metricParsers)); + } + // It does not have range aggregation, but has composite aggregation: stats count() by non_range_field + // CompositeAgg + // Metric + else { + List> buckets = + createCompositeBuckets(filteredGroupList, project, helper); return Pair.of( Collections.singletonList( AggregationBuilders.composite("composite_buckets", buckets) .subAggregations(metricBuilder) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metricParserList)); + new CompositeAggregationParser(metricParsers)); } } catch (Throwable e) { Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class); @@ -462,7 +527,7 @@ private static ValuesSourceAggregationBuilder createBucket( private static CompositeValuesSourceBuilder createCompositeBucket( Integer groupIndex, Project project, AggregateBuilderHelper helper) { RexNode rex = project.getProjects().get(groupIndex); - String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); + String bucketName = project.getRowType().getFieldNames().get(groupIndex); if (rex instanceof RexCall rexCall && rexCall.getKind() == SqlKind.OTHER_FUNCTION && rexCall.getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java new file mode 100644 index 00000000000..3a7e75d90f5 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -0,0 +1,247 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.request; + +import java.math.BigDecimal; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import com.google.common.collect.BoundType; +import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlBinaryOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.util.Sarg; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregator; + +/** + * Analyzer to detect CASE expressions that can be converted to OpenSearch range aggregations. + * + *

Strict validation rules: + *

    + *
  • All conditions must compare the same field with literals
  • + *
  • Only simple comparison operators (>, >=, <, <=) are allowed
  • + *
  • Ranges must be non-overlapping and contiguous
  • + *
  • Return values must be string literals
  • + *
+ */ +public class CaseRangeAnalyzer { + private final RelDataType rowType; + private final RangeSet rangeSet; + + public CaseRangeAnalyzer(RelDataType rowType) { + this.rowType = rowType; + this.rangeSet = TreeRangeSet.create(); + } + + /** + * Creates a new CaseRangeAnalyzer instance. + * + * @param rowType the row type information for field resolution + * @return a new CaseRangeAnalyzer instance + */ + public static CaseRangeAnalyzer create(RelDataType rowType) { + return new CaseRangeAnalyzer(rowType); + } + + /** + * Analyzes a CASE expression to determine if it can be converted to a range aggregation. + * + * @param caseCall The CASE RexCall to analyze + * @return Optional RangeAggregationBuilder if conversion is possible, empty otherwise + */ + public Optional analyze(RexCall caseCall) { + if (!caseCall.getKind().equals(SqlKind.CASE)) { + return Optional.empty(); + } + + List operands = caseCall.getOperands(); + RangeAggregationBuilder aggregationBuilder = AggregationBuilders.range("case_range"); + + // Process WHEN-THEN pairs + for (int i = 0; i < operands.size() - 1; i += 2) { + RexNode condition = operands.get(i); + RexNode result = operands.get(i + 1); + // Result must be a literal + if (!(result instanceof RexLiteral)) { + return Optional.empty(); + } + String key = ((RexLiteral) result).getValueAs(String.class); + analyzeCondition(aggregationBuilder, condition, key); + } + + // Check ELSE clause + // TODO: Currently, we ignore else clause + // Process the case without else clause and check range completeness later + return Optional.of(aggregationBuilder); + } + + /** Analyzes a single condition in the CASE WHEN clause. */ + private void analyzeCondition(RangeAggregationBuilder builder, RexNode condition, String key) { + if (!(condition instanceof RexCall)) { + throwUnsupported("condition must be a RexCall"); + } + + RexCall call = (RexCall) condition; + SqlKind kind = call.getKind(); + + // Handle simple comparisons + if (kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL || kind == SqlKind.GREATER_THAN) { + builder.addRange(analyzeSimpleComparison(builder, call, key)); + } + // Handle AND conditions (for range conditions like x >= 10 AND x < 100) + else if (kind == SqlKind.AND || kind == SqlKind.OR) { + analyzeCompositeCondition(builder, call, key); + } else if (kind == SqlKind.SEARCH) { + analyzeSearchCondition(builder, call, key); + } + } + + private RangeAggregator.Range analyzeSimpleComparison(RangeAggregationBuilder builder, RexCall call, String key) { + List operands = call.getOperands(); + if (operands.size() != 2 || !(call.getOperator() instanceof SqlBinaryOperator)) { + throwUnsupported(); + } + RexNode left = operands.get(0); + RexNode right = operands.get(1); + SqlOperator operator = call.getOperator(); + RexInputRef inputRef = null; + RexLiteral literal = null; + + if (left instanceof RexInputRef && right instanceof RexLiteral) { + inputRef = (RexInputRef) left; + literal = (RexLiteral) right; + } else if (left instanceof RexLiteral && right instanceof RexInputRef) { + inputRef = (RexInputRef) right; + literal = (RexLiteral) left; + operator = operator.reverse(); + } else { + throwUnsupported(); + } + + if (operator == null) { + throwUnsupported(); + } + + String fieldName = rowType.getFieldNames().get(inputRef.getIndex()); + if (builder.field() == null) { + builder.field(fieldName); + } else if (!Objects.equals(builder.field(), fieldName)) { + throwUnsupported("comparison must be performed on the same field"); + } + + Double value = literal.getValueAs(Double.class); + return switch (operator.getKind()) { + case GREATER_THAN_OR_EQUAL -> new RangeAggregator.Range(key, value, null); + case LESS_THAN -> new RangeAggregator.Range(key, null, value); + default -> throw new UnsupportedOperationException("ranges must equivalents of field >= constant or field < constant"); + }; + } + + private void analyzeCompositeCondition(RangeAggregationBuilder builder, RexCall compositeCall, String key) { + RexNode left = compositeCall.getOperands().get(0); + RexNode right = compositeCall.getOperands().get(1); + + if (!(left instanceof RexCall && right instanceof RexCall && ((RexCall) left).getOperator() instanceof SqlBinaryOperator && ((RexCall) right).getOperator() instanceof SqlBinaryOperator)) { + throwUnsupported("cannot analyze deep nested comparison"); + } + + // For AND conditions, we need to analyze them separately and combine + // Create temporary ranges to analyze the conditions + RangeAggregator.Range leftRange = analyzeSimpleComparison(builder, (RexCall) left, key); + RangeAggregator.Range rightRange = analyzeSimpleComparison(builder, (RexCall) right, key); + + // Combine into single range + if (compositeCall.getKind() == SqlKind.AND) { + and(builder, leftRange, rightRange, key); + } else if (compositeCall.getKind() == SqlKind.OR) { + or(builder, leftRange, rightRange, key); + } + } + + private void analyzeSearchCondition(RangeAggregationBuilder builder, RexCall searchCall, String key) { + RexNode field = searchCall.getOperands().getFirst(); + if (!(field instanceof RexInputRef) || !Objects.equals(getFieldName((RexInputRef) field), builder.field())) { + throwUnsupported("Range query must be performed on the same field"); + } + RexLiteral literal = (RexLiteral) searchCall.getOperands().getLast(); + Sarg sarg = literal.getValueAs(Sarg.class); + for(Object r: sarg.rangeSet.asRanges()){ + Range range = (Range) r; + if ((range.hasLowerBound() && range.lowerBoundType() != BoundType.CLOSED) || (range.hasUpperBound() && range.upperBoundType() != BoundType.OPEN)){ + throwUnsupported("Range query only supports closed-open ranges"); + } + if (!range.hasLowerBound() && range.hasUpperBound()) { + builder.addUnboundedTo(key, range.upperEndpoint().doubleValue()); + } else if (range.hasLowerBound() && !range.hasUpperBound()) { + builder.addUnboundedFrom(key, range.lowerEndpoint().doubleValue()); + } else if (range.hasLowerBound()) { + builder.addRange(key, range.lowerEndpoint().doubleValue(), range.upperEndpoint().doubleValue()); + } else { + builder.addRange(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); + } + } + } + + private static void and( + RangeAggregationBuilder builder, RangeAggregator.Range left, RangeAggregator.Range right, String key) { + double mergedFrom = Math.max(left.getFrom(), right.getFrom()); + double mergedTo = Math.min(left.getTo(), right.getTo()); + if (mergedFrom > Double.NEGATIVE_INFINITY && mergedTo < Double.POSITIVE_INFINITY) { + // Closed range: both bounds are finite + builder.addRange(key, mergedFrom, mergedTo); + } else if (mergedFrom > Double.NEGATIVE_INFINITY) { + // Unbounded from: only lower bound (e.g., x >= 10) + builder.addUnboundedFrom(key, mergedFrom); + } else if (mergedTo < Double.POSITIVE_INFINITY) { + // Unbounded to: only upper bound (e.g., x < 50) + builder.addUnboundedTo(key, mergedTo); + } // If no overlapping, do nothing + } + + private static void or( + RangeAggregationBuilder builder, RangeAggregator.Range left, RangeAggregator.Range right, String key) { + // sort left and right by swapping if necessary + if(right.getFrom() < left.getFrom() || (left.getFrom() == right.getFrom() && right.getTo() < left.getTo())) { + var tmp = right; + right = left; + left = tmp; + } + boolean overlap = left.getTo() > right.getFrom(); + if (overlap) { + // Ranges overlap, meaning they cover all ranges - add both unbounded ranges + double mergedFrom = Math.min(left.getFrom(), right.getFrom()); + double mergedTo = Math.max(left.getTo(), right.getTo()); + builder.addRange(key, mergedFrom, mergedTo); + } else { + builder.addRange(left); + builder.addRange(right); + } + } + + private String getFieldName(RexInputRef field) { + return rowType.getFieldNames().get(field.getIndex()); + } + + private static void throwUnsupported() { + throw new UnsupportedOperationException("Cannot create range aggregator"); + } + + private static void throwUnsupported(String message) { + throw new UnsupportedOperationException("Cannot create range aggregator: " + message); + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java new file mode 100644 index 00000000000..8104656feb0 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.LinkedHashMap; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.bucket.range.Range; + +/** + * Parser for {@link Range} aggregations + * (org.opensearch.search.aggregations.bucket.range.InternalRange). Parses range bucket aggregations + * and returns a map with range keys and their document counts. + */ +@EqualsAndHashCode +@RequiredArgsConstructor +public class RangeParser implements MetricParser { + + @Getter private final String name; + + @Override + public Map parse(Aggregation aggregation) { + Range rangeAgg = (Range) aggregation; + Map result = new LinkedHashMap<>(); + + for (Range.Bucket bucket : rangeAgg.getBuckets()) { + String key = bucket.getKeyAsString(); + if (key == null || key.isEmpty()) { + // Generate key from range bounds if no explicit key + key = generateRangeKey(bucket); + } + if (bucket.getAggregations() != null && bucket.getDocCount() > 0) { + result.put(key, bucket.getAggregations()); + } + } + return result; + } + + /** + * Generates a human-readable key for range buckets without explicit keys. + * + * @param bucket the range bucket + * @return formatted range key (e.g., "10.0-20.0", "*-10.0", "20.0-*") + */ + private String generateRangeKey(Range.Bucket bucket) { + Object from = bucket.getFrom(); + Object to = bucket.getTo(); + + String fromStr = (from == null) ? "*" : from.toString(); + String toStr = (to == null) ? "*" : to.toString(); + + return fromStr + "-" + toStr; + } +} diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d5c55d10258..bd4c7d5fca5 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -831,7 +831,7 @@ public String visitExistsSubquery(ExistsSubquery node, String context) { @Override public String visitCase(Case node, String context) { StringBuilder builder = new StringBuilder(); - builder.append("cast("); + builder.append("case("); for (When when : node.getWhenClauses()) { builder.append(analyze(when.getCondition(), context)); builder.append(","); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 5dfc73f5483..0f24dc8e0c6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -511,13 +511,13 @@ public void testScalarSubquery() { @Test public void testCaseWhen() { assertEquals( - "source=t | eval level=cast(score >= ***,***,score >= *** and score < ***,*** else ***) |" + "source=t | eval level=case(score >= ***,***,score >= *** and score < ***,*** else ***) |" + " fields + level", anonymize( "source=t | eval level=CASE(score >= 90, 'A', score >= 80 AND score < 90, 'B' else 'C')" + " | fields level")); assertEquals( - "source=t | eval level=cast(score >= ***,***,score >= *** and score < ***,***) | fields +" + "source=t | eval level=case(score >= ***,***,score >= *** and score < ***,***) | fields +" + " level", anonymize( "source=t | eval level=CASE(score >= 90, 'A', score >= 80 AND score < 90, 'B')" From 6afdcb6e4c427bb2399d294761400a8c5c43f3cc Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 26 Sep 2025 13:57:46 +0800 Subject: [PATCH 02/26] Correct case analyzer Signed-off-by: Yuanchun Shen --- .../opensearch/request/CaseRangeAnalyzer.java | 223 ++++++++++-------- 1 file changed, 125 insertions(+), 98 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index 3a7e75d90f5..65f5d5792e8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -5,16 +5,14 @@ package org.opensearch.sql.opensearch.request; -import java.math.BigDecimal; -import java.util.List; -import java.util.Objects; -import java.util.Optional; - import com.google.common.collect.BoundType; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.TreeRangeSet; -import lombok.RequiredArgsConstructor; +import java.math.BigDecimal; +import java.util.List; +import java.util.Objects; +import java.util.Optional; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; @@ -26,26 +24,33 @@ import org.apache.calcite.util.Sarg; import org.opensearch.search.aggregations.AggregationBuilders; import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; -import org.opensearch.search.aggregations.bucket.range.RangeAggregator; /** * Analyzer to detect CASE expressions that can be converted to OpenSearch range aggregations. * *

Strict validation rules: + * *

    - *
  • All conditions must compare the same field with literals
  • - *
  • Only simple comparison operators (>, >=, <, <=) are allowed
  • - *
  • Ranges must be non-overlapping and contiguous
  • - *
  • Return values must be string literals
  • + *
  • All conditions must compare the same field with literals + *
  • Only closed-open, at-least, and less-than ranges are allowed + *
  • Return values must be string literals *
*/ public class CaseRangeAnalyzer { + /** The default key to use if there isn't a key specified for the else case */ + public static final String DEFAULT_ELSE_KEY = "null"; + + /** The name for the range aggregation */ + public static final String NAME = "case_range"; + private final RelDataType rowType; - private final RangeSet rangeSet; + private final RangeSet takenRange; + private final RangeAggregationBuilder builder; public CaseRangeAnalyzer(RelDataType rowType) { this.rowType = rowType; - this.rangeSet = TreeRangeSet.create(); + this.takenRange = TreeRangeSet.create(); + this.builder = AggregationBuilders.range(NAME); } /** @@ -70,28 +75,34 @@ public Optional analyze(RexCall caseCall) { } List operands = caseCall.getOperands(); - RangeAggregationBuilder aggregationBuilder = AggregationBuilders.range("case_range"); // Process WHEN-THEN pairs for (int i = 0; i < operands.size() - 1; i += 2) { RexNode condition = operands.get(i); - RexNode result = operands.get(i + 1); + RexNode expr = operands.get(i + 1); // Result must be a literal - if (!(result instanceof RexLiteral)) { + if (!(expr instanceof RexLiteral)) { return Optional.empty(); } - String key = ((RexLiteral) result).getValueAs(String.class); - analyzeCondition(aggregationBuilder, condition, key); + String key = ((RexLiteral) expr).getValueAs(String.class); + analyzeCondition(condition, key); } // Check ELSE clause - // TODO: Currently, we ignore else clause - // Process the case without else clause and check range completeness later - return Optional.of(aggregationBuilder); + RexNode elseExpr = operands.getLast(); + String elseKey; + if (RexLiteral.isNullLiteral(elseExpr)) { + // range key doesn't support values of type: VALUE_NULL + elseKey = DEFAULT_ELSE_KEY; + } else { + elseKey = ((RexLiteral) elseExpr).getValueAs(String.class); + } + addRangeSet(elseKey, takenRange.complement()); + return Optional.of(builder); } /** Analyzes a single condition in the CASE WHEN clause. */ - private void analyzeCondition(RangeAggregationBuilder builder, RexNode condition, String key) { + private void analyzeCondition(RexNode condition, String key) { if (!(condition instanceof RexCall)) { throwUnsupported("condition must be a RexCall"); } @@ -100,28 +111,35 @@ private void analyzeCondition(RangeAggregationBuilder builder, RexNode condition SqlKind kind = call.getKind(); // Handle simple comparisons - if (kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL || kind == SqlKind.GREATER_THAN) { - builder.addRange(analyzeSimpleComparison(builder, call, key)); + if (kind == SqlKind.GREATER_THAN_OR_EQUAL + || kind == SqlKind.LESS_THAN + || kind == SqlKind.LESS_THAN_OR_EQUAL + || kind == SqlKind.GREATER_THAN) { + analyzeSimpleComparison(call, key); + } else if (kind == SqlKind.SEARCH) { + analyzeSearchCondition(call, key); } - // Handle AND conditions (for range conditions like x >= 10 AND x < 100) + // AND / OR will only appear when users try to create a complex condition on multiple fields + // E.g. (a > 3 and b < 5). Otherwise, the complex conditions will be converted to a SEARCH call. else if (kind == SqlKind.AND || kind == SqlKind.OR) { - analyzeCompositeCondition(builder, call, key); - } else if (kind == SqlKind.SEARCH) { - analyzeSearchCondition(builder, call, key); + throwUnsupported("Range queries must be performed on the same field"); + } else { + throwUnsupported("Can not analyze condition as a range query: " + call); } } - private RangeAggregator.Range analyzeSimpleComparison(RangeAggregationBuilder builder, RexCall call, String key) { + private void analyzeSimpleComparison(RexCall call, String key) { List operands = call.getOperands(); if (operands.size() != 2 || !(call.getOperator() instanceof SqlBinaryOperator)) { throwUnsupported(); } RexNode left = operands.get(0); RexNode right = operands.get(1); - SqlOperator operator = call.getOperator(); + SqlOperator operator = call.getOperator(); RexInputRef inputRef = null; RexLiteral literal = null; + // Swap inputRef to the left if necessary if (left instanceof RexInputRef && right instanceof RexLiteral) { inputRef = (RexInputRef) left; literal = (RexLiteral) right; @@ -145,91 +163,93 @@ private RangeAggregator.Range analyzeSimpleComparison(RangeAggregationBuilder bu } Double value = literal.getValueAs(Double.class); - return switch (operator.getKind()) { - case GREATER_THAN_OR_EQUAL -> new RangeAggregator.Range(key, value, null); - case LESS_THAN -> new RangeAggregator.Range(key, null, value); - default -> throw new UnsupportedOperationException("ranges must equivalents of field >= constant or field < constant"); - }; - } - - private void analyzeCompositeCondition(RangeAggregationBuilder builder, RexCall compositeCall, String key) { - RexNode left = compositeCall.getOperands().get(0); - RexNode right = compositeCall.getOperands().get(1); - - if (!(left instanceof RexCall && right instanceof RexCall && ((RexCall) left).getOperator() instanceof SqlBinaryOperator && ((RexCall) right).getOperator() instanceof SqlBinaryOperator)) { - throwUnsupported("cannot analyze deep nested comparison"); + if (value == null) { + throwUnsupported("Cannot parse value for comparison"); } - - // For AND conditions, we need to analyze them separately and combine - // Create temporary ranges to analyze the conditions - RangeAggregator.Range leftRange = analyzeSimpleComparison(builder, (RexCall) left, key); - RangeAggregator.Range rightRange = analyzeSimpleComparison(builder, (RexCall) right, key); - - // Combine into single range - if (compositeCall.getKind() == SqlKind.AND) { - and(builder, leftRange, rightRange, key); - } else if (compositeCall.getKind() == SqlKind.OR) { - or(builder, leftRange, rightRange, key); + switch (operator.getKind()) { + case GREATER_THAN_OR_EQUAL -> { + addFrom(key, value); + } + case LESS_THAN -> { + addTo(key, value); + } + default -> throw new UnsupportedOperationException( + "ranges must equivalents of field >= constant or field < constant"); } + ; } - private void analyzeSearchCondition(RangeAggregationBuilder builder, RexCall searchCall, String key) { + private void analyzeSearchCondition(RexCall searchCall, String key) { RexNode field = searchCall.getOperands().getFirst(); - if (!(field instanceof RexInputRef) || !Objects.equals(getFieldName((RexInputRef) field), builder.field())) { + if (!(field instanceof RexInputRef)) { + throwUnsupported("Range query must be performed on a field"); + } + String fieldName = getFieldName((RexInputRef) field); + if (builder.field() == null) { + builder.field(fieldName); + } else if (!Objects.equals(builder.field(), fieldName)) { throwUnsupported("Range query must be performed on the same field"); } RexLiteral literal = (RexLiteral) searchCall.getOperands().getLast(); Sarg sarg = literal.getValueAs(Sarg.class); - for(Object r: sarg.rangeSet.asRanges()){ + for (Object r : sarg.rangeSet.asRanges()) { Range range = (Range) r; - if ((range.hasLowerBound() && range.lowerBoundType() != BoundType.CLOSED) || (range.hasUpperBound() && range.upperBoundType() != BoundType.OPEN)){ - throwUnsupported("Range query only supports closed-open ranges"); - } + validateRange(range); if (!range.hasLowerBound() && range.hasUpperBound()) { - builder.addUnboundedTo(key, range.upperEndpoint().doubleValue()); + // It will be Double.MAX_VALUE if be big decimal is greater than Double.MAX_VALUE + double upper = range.upperEndpoint().doubleValue(); + addTo(key, upper); } else if (range.hasLowerBound() && !range.hasUpperBound()) { - builder.addUnboundedFrom(key, range.lowerEndpoint().doubleValue()); + double lower = range.lowerEndpoint().doubleValue(); + addFrom(key, lower); } else if (range.hasLowerBound()) { - builder.addRange(key, range.lowerEndpoint().doubleValue(), range.upperEndpoint().doubleValue()); + double lower = range.lowerEndpoint().doubleValue(); + double upper = range.upperEndpoint().doubleValue(); + addBetween(key, lower, upper); } else { - builder.addRange(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); + addBetween(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); } } } - private static void and( - RangeAggregationBuilder builder, RangeAggregator.Range left, RangeAggregator.Range right, String key) { - double mergedFrom = Math.max(left.getFrom(), right.getFrom()); - double mergedTo = Math.min(left.getTo(), right.getTo()); - if (mergedFrom > Double.NEGATIVE_INFINITY && mergedTo < Double.POSITIVE_INFINITY) { - // Closed range: both bounds are finite - builder.addRange(key, mergedFrom, mergedTo); - } else if (mergedFrom > Double.NEGATIVE_INFINITY) { - // Unbounded from: only lower bound (e.g., x >= 10) - builder.addUnboundedFrom(key, mergedFrom); - } else if (mergedTo < Double.POSITIVE_INFINITY) { - // Unbounded to: only upper bound (e.g., x < 50) - builder.addUnboundedTo(key, mergedTo); - } // If no overlapping, do nothing - } - - private static void or( - RangeAggregationBuilder builder, RangeAggregator.Range left, RangeAggregator.Range right, String key) { - // sort left and right by swapping if necessary - if(right.getFrom() < left.getFrom() || (left.getFrom() == right.getFrom() && right.getTo() < left.getTo())) { - var tmp = right; - right = left; - left = tmp; - } - boolean overlap = left.getTo() > right.getFrom(); - if (overlap) { - // Ranges overlap, meaning they cover all ranges - add both unbounded ranges - double mergedFrom = Math.min(left.getFrom(), right.getFrom()); - double mergedTo = Math.max(left.getTo(), right.getTo()); - builder.addRange(key, mergedFrom, mergedTo); + private void addFrom(String key, Double value) { + var from = Range.atLeast(value); + updateRange(key, from); + } + + private void addTo(String key, Double value) { + var to = Range.lessThan(value); + updateRange(key, to); + } + + private void addBetween(String key, Double from, Double to) { + var range = Range.closedOpen(from, to); + updateRange(key, range); + } + + private void updateRange(String key, Range range) { + // The range to add: remaining space ∩ new range + RangeSet toAdd = takenRange.complement().subRangeSet(range); + addRangeSet(key, toAdd); + takenRange.add(range); + } + + // Add range set without updating taken range + private void addRangeSet(String key, RangeSet rangeSet) { + rangeSet.asRanges().forEach(range -> addRange(key, range)); + } + + // Add range without updating taken range + private void addRange(String key, Range range) { + validateRange(range); + if (range.hasLowerBound() && range.hasUpperBound()) { + builder.addRange(key, range.lowerEndpoint(), range.upperEndpoint()); + } else if (range.hasLowerBound()) { + builder.addUnboundedFrom(key, range.lowerEndpoint()); + } else if (range.hasUpperBound()) { + builder.addUnboundedTo(key, range.upperEndpoint()); } else { - builder.addRange(left); - builder.addRange(right); + builder.addRange(key, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); } } @@ -237,11 +257,18 @@ private String getFieldName(RexInputRef field) { return rowType.getFieldNames().get(field.getIndex()); } + private static void validateRange(Range range) { + if ((range.hasLowerBound() && range.lowerBoundType() != BoundType.CLOSED) + || (range.hasUpperBound() && range.upperBoundType() != BoundType.OPEN)) { + throwUnsupported("Range query only supports closed-open ranges"); + } + } + private static void throwUnsupported() { - throw new UnsupportedOperationException("Cannot create range aggregator"); + throw new UnsupportedOperationException("Cannot create range aggregator from case"); } private static void throwUnsupported(String message) { throw new UnsupportedOperationException("Cannot create range aggregator: " + message); } -} \ No newline at end of file +} From f416deed3b86ad6cd8684ae9926c74a67cffd48b Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 26 Sep 2025 17:23:05 +0800 Subject: [PATCH 03/26] Create bucket aggregation parsers that supports parsing nested sub aggregations Signed-off-by: Yuanchun Shen --- .../remote/CalcitePPLCaseFunctionIT.java | 49 ++++--------- .../opensearch/request/AggregateAnalyzer.java | 54 ++++++++------ .../opensearch/request/CaseRangeAnalyzer.java | 11 ++- .../response/agg/BucketAggregationParser.java | 70 ++++++++++++------ .../agg/LeafBucketAggregationParser.java | 72 +++++++++++++++++++ .../opensearch/response/agg/RangeParser.java | 60 ---------------- ...enSearchAggregationResponseParserTest.java | 10 +-- 7 files changed, 176 insertions(+), 150 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index 42dc35a29bf..52a92c0956b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -5,9 +5,9 @@ package org.opensearch.sql.calcite.remote; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -264,10 +264,7 @@ public void testCaseRangeAggregationPushdown() throws IOException { + ") | stats count() as total by range_bucket | sort range_bucket", TEST_INDEX_WEBLOGS)); - verifySchema( - actual, - schema("range_bucket", "string"), - schema("total", "long")); + verifySchema(actual, schema("range_bucket", "string"), schema("total", "long")); // This should work but won't be optimized due to implicit NULL bucket assertTrue(actual.getJSONArray("datarows").length() > 0); @@ -279,12 +276,10 @@ public void testCaseRangeAggregationWithMetrics() throws IOException { JSONObject actual = executeQuery( String.format( - "source=%s | eval size_category = case(" - + " cast(bytes as int) < 2000, 'small'," - + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium'," - + " cast(bytes as int) >= 5000, 'large'" - + ") | stats count() as total, avg(cast(bytes as int)) as avg_bytes by size_category" - + " | sort size_category", + "source=%s | eval size_category = case( cast(bytes as int) < 2000, 'small', " + + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium', " + + " cast(bytes as int) >= 5000, 'large') | stats count() as total," + + " avg(cast(bytes as int)) as avg_bytes by size_category | sort size_category", TEST_INDEX_WEBLOGS)); verifySchema( @@ -304,19 +299,14 @@ public void testCaseRangeAggregationWithElse() throws IOException { JSONObject actual = executeQuery( String.format( - "source=%s | eval status_category = case(" - + " cast(response as int) < 300, 'success'," - + " cast(response as int) >= 300 AND cast(response as int) < 400, 'redirect'," - + " cast(response as int) >= 400 AND cast(response as int) < 500, 'client_error'," - + " cast(response as int) >= 500, 'server_error'" - + " else 'unknown'" - + ") | stats count() by status_category | sort status_category", + "source=%s | eval status_category = case( cast(response as int) < 300, 'success', " + + " cast(response as int) >= 300 AND cast(response as int) < 400, 'redirect', " + + " cast(response as int) >= 400 AND cast(response as int) < 500," + + " 'client_error', cast(response as int) >= 500, 'server_error' else" + + " 'unknown') | stats count() by status_category | sort status_category", TEST_INDEX_WEBLOGS)); - verifySchema( - actual, - schema("status_category", "string"), - schema("count()", "long")); + verifySchema(actual, schema("status_category", "string"), schema("count()", "long")); // Should handle the ELSE case for null/non-numeric responses assertTrue(actual.getJSONArray("datarows").length() > 0); @@ -335,10 +325,7 @@ public void testNonOptimizableCaseExpression() throws IOException { + ") | stats count() by mixed_condition", TEST_INDEX_WEBLOGS)); - verifySchema( - actual, - schema("mixed_condition", "string"), - schema("count()", "long")); + verifySchema(actual, schema("mixed_condition", "string"), schema("count()", "long")); // This should work but won't be optimized assertTrue(actual.getJSONArray("datarows").length() > 0); @@ -356,10 +343,7 @@ public void testCaseWithNonLiteralResult() throws IOException { + ") | stats count() by computed_result | head 3", TEST_INDEX_WEBLOGS)); - verifySchema( - actual, - schema("computed_result", "string"), - schema("count()", "long")); + verifySchema(actual, schema("computed_result", "string"), schema("count()", "long")); // This should work but won't be optimized to range aggregation assertTrue(actual.getJSONArray("datarows").length() > 0); @@ -379,10 +363,7 @@ public void testOptimizableCaseRangeAggregation() throws IOException { + ") | stats count() by size_bucket | sort size_bucket", TEST_INDEX_WEBLOGS)); - verifySchema( - actual, - schema("size_bucket", "string"), - schema("count()", "long")); + verifySchema(actual, schema("size_bucket", "string"), schema("count()", "long")); // This should work - the explicit ELSE makes it potentially optimizable assertTrue(actual.getJSONArray("datarows").length() > 0); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 65355a809b1..a1dbb615f8e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -41,7 +41,6 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; - import lombok.RequiredArgsConstructor; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.core.Aggregate; @@ -60,10 +59,10 @@ import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactories.Builder; import org.opensearch.search.aggregations.BucketOrder; -import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.opensearch.search.aggregations.metrics.ExtendedStats; import org.opensearch.search.aggregations.metrics.PercentilesAggregationBuilder; @@ -81,11 +80,10 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; -import org.opensearch.sql.opensearch.response.agg.RangeParser; import org.opensearch.sql.opensearch.response.agg.SinglePercentileParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.response.agg.StatsParser; @@ -204,11 +202,19 @@ public static Pair, OpenSearchAggregationResponseParser Builder metricBuilder = builderAndParser.getLeft(); List metricParsers = builderAndParser.getRight(); // Find group by fields derived from CASE functions and convert them to range queries - CaseRangeAnalyzer rangeAnalyzer = CaseRangeAnalyzer.create(rowType); - List> groupsByCase = groupList.stream() - .map(i -> Pair.of(i, project.getProjects().get(i))) - .filter(p -> p.getRight() instanceof RexCall rexCall && rexCall.getKind() == SqlKind.CASE) - .map(p -> Pair.of(p.getLeft(), rangeAnalyzer.analyze((RexCall) p.getRight()))) + List> groupsByCase = + groupList.stream() + .map(i -> Pair.of(i, project.getNamedProjects().get(i))) + .filter( + p -> + p.getRight().getKey() instanceof RexCall rexCall + && rexCall.getKind() == SqlKind.CASE) + .map( + p -> + Pair.of( + p.getLeft(), + CaseRangeAnalyzer.create(p.getRight().getValue(), rowType) + .analyze((RexCall) p.getRight().getKey()))) .filter(p -> p.getRight().isPresent()) .map(p -> Pair.of(p.getLeft(), p.getRight().get())) .toList(); @@ -220,28 +226,32 @@ public static Pair, OpenSearchAggregationResponseParser // Note that but a composite aggregation can not be a sub aggregation of range aggregation, // but range aggregation can be a sub aggregation of a composite aggregation. AggregationBuilder rangeAggregationBuilder = null; + BucketAggregationParser bucketAggregationParser = null; if (!groupsByCase.isEmpty()) { for (int i = 0; i < groupsByCase.size(); i++) { Pair pair = groupsByCase.get(i); if (i == 0) { rangeAggregationBuilder = pair.getRight(); + bucketAggregationParser = + new BucketAggregationParser(new LeafBucketAggregationParser(metricParsers)); } else { groupsByCase.get(i - 1).getRight().subAggregation(pair.getRight()); + bucketAggregationParser = new BucketAggregationParser(bucketAggregationParser); } } groupsByCase.getLast().getRight().subAggregations(metricBuilder); - metricParsers.add(new RangeParser("case_range")); } // Remove groups that are converted to ranges from groupList Set toRemove = groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); - List filteredGroupList = groupList.stream().filter(i -> !toRemove.contains(i)).toList(); + List filteredGroupList = + groupList.stream().filter(i -> !toRemove.contains(i)).toList(); // The top-level query is a range query: stats count() by range_field // RangeAgg // Metric if (!groupsByCase.isEmpty() && filteredGroupList.isEmpty()) { - return Pair.of(List.of(rangeAggregationBuilder), new BucketAggregationParser(metricParsers)); + return Pair.of(List.of(rangeAggregationBuilder), bucketAggregationParser); } // No parent composite aggregation or range aggregation is attached: stats count() // Metric @@ -250,21 +260,23 @@ else if (aggregate.getGroupSet().isEmpty() && filteredGroupList.isEmpty()) { ImmutableList.copyOf(metricBuilder.getAggregatorFactories()), new NoBucketAggregationParser(metricParsers)); } - // It has both composite aggregation and range aggregation: stats count() by range_field, non_range_field + // It has both composite aggregation and range aggregation: stats count() by range_field, + // non_range_field // CompositeAgg // RangeAgg // Metric else if (!groupsByCase.isEmpty()) { List> buckets = - createCompositeBuckets(filteredGroupList, project, helper); + createCompositeBuckets(filteredGroupList, project, helper); return Pair.of( - Collections.singletonList( - AggregationBuilders.composite("composite_buckets", buckets) - .subAggregation(rangeAggregationBuilder) - .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metricParsers)); + Collections.singletonList( + AggregationBuilders.composite("composite_buckets", buckets) + .subAggregation(rangeAggregationBuilder) + .size(AGGREGATION_BUCKET_SIZE)), + new BucketAggregationParser(bucketAggregationParser)); } - // It does not have range aggregation, but has composite aggregation: stats count() by non_range_field + // It does not have range aggregation, but has composite aggregation: stats count() by + // non_range_field // CompositeAgg // Metric else { @@ -275,7 +287,7 @@ else if (!groupsByCase.isEmpty()) { AggregationBuilders.composite("composite_buckets", buckets) .subAggregations(metricBuilder) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metricParsers)); + new BucketAggregationParser(new LeafBucketAggregationParser(metricParsers))); } } catch (Throwable e) { Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index 65f5d5792e8..921ccb649ed 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -40,17 +40,14 @@ public class CaseRangeAnalyzer { /** The default key to use if there isn't a key specified for the else case */ public static final String DEFAULT_ELSE_KEY = "null"; - /** The name for the range aggregation */ - public static final String NAME = "case_range"; - private final RelDataType rowType; private final RangeSet takenRange; private final RangeAggregationBuilder builder; - public CaseRangeAnalyzer(RelDataType rowType) { + public CaseRangeAnalyzer(String name, RelDataType rowType) { this.rowType = rowType; this.takenRange = TreeRangeSet.create(); - this.builder = AggregationBuilders.range(NAME); + this.builder = AggregationBuilders.range(name).keyed(true); } /** @@ -59,8 +56,8 @@ public CaseRangeAnalyzer(RelDataType rowType) { * @param rowType the row type information for field resolution * @return a new CaseRangeAnalyzer instance */ - public static CaseRangeAnalyzer create(RelDataType rowType) { - return new CaseRangeAnalyzer(rowType); + public static CaseRangeAnalyzer create(String name, RelDataType rowType) { + return new CaseRangeAnalyzer(name, rowType); } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index 149480b39ab..bab5a4e27fa 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -5,43 +5,67 @@ package org.opensearch.sql.opensearch.response.agg; -import java.util.Arrays; -import java.util.LinkedHashMap; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import lombok.EqualsAndHashCode; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; -/** - * Use BucketAggregationParser only when there is a single group-by key, it returns multiple - * buckets. {@link CompositeAggregationParser} is used for multiple group by keys - */ -@EqualsAndHashCode public class BucketAggregationParser implements OpenSearchAggregationResponseParser { - private final MetricParserHelper metricsParser; - - public BucketAggregationParser(MetricParser... metricParserList) { - metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); - } + private final OpenSearchAggregationResponseParser subAggParser; - public BucketAggregationParser(List metricParserList) { - metricsParser = new MetricParserHelper(metricParserList); + public BucketAggregationParser(OpenSearchAggregationResponseParser subAggParser) { + this.subAggParser = subAggParser; } @Override public List> parse(Aggregations aggregations) { - Aggregation agg = aggregations.asList().getFirst(); - return ((MultiBucketsAggregation) agg) - .getBuckets().stream().map(b -> parse(b, agg.getName())).collect(Collectors.toList()); + if (subAggParser instanceof BucketAggregationParser) { + return aggregations.asList().stream() + .map( + aggregation -> { + if (aggregation instanceof CompositeAggregation) { + return (CompositeAggregation) aggregation; + } else { + return (MultiBucketsAggregation) aggregation; + } + }) + .map(MultiBucketsAggregation::getBuckets) + .flatMap(List::stream) + .map(this::parse) + .flatMap(List::stream) + .collect(Collectors.toList()); + } else if (subAggParser instanceof LeafBucketAggregationParser) { + return subAggParser.parse(aggregations); + } else { + throw new IllegalStateException( + "Sub parsers of a BucketAggregationParser can only be either BucketAggregationParser or" + + " LeafBucketAggregationParser"); + } + } + + private List> parse(MultiBucketsAggregation.Bucket bucket) { + if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { + return parse(compositeBucket); + } + List> results = new ArrayList<>(); + for (Aggregation subAgg : bucket.getAggregations()) { + var sub = (Aggregations) subAgg; + results.addAll(subAggParser.parse(sub)); + } + return results; } - private Map parse(MultiBucketsAggregation.Bucket bucket, String keyName) { - Map resultMap = new LinkedHashMap<>(); - resultMap.put(keyName, bucket.getKey()); - resultMap.putAll(metricsParser.parse(bucket.getAggregations())); - return resultMap; + private List> parse(CompositeAggregation.Bucket bucket) { + Map common = new HashMap<>(bucket.getKey()); + List> results = subAggParser.parse(bucket.getAggregations()); + for (Map r : results) { + r.putAll(common); + } + return results; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java new file mode 100644 index 00000000000..0ba384d8989 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -0,0 +1,72 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.range.Range; + +/** + * Use BucketAggregationParser only when there is a single group-by key, it returns multiple + * buckets. {@link CompositeAggregationParser} is used for multiple group by keys + */ +@EqualsAndHashCode +public class LeafBucketAggregationParser implements OpenSearchAggregationResponseParser { + private final MetricParserHelper metricsParser; + + public LeafBucketAggregationParser(MetricParser... metricParserList) { + metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); + } + + public LeafBucketAggregationParser(List metricParserList) { + metricsParser = new MetricParserHelper(metricParserList); + } + + @Override + public List> parse(Aggregations aggregations) { + Aggregation agg = aggregations.asList().getFirst(); + return ((MultiBucketsAggregation) agg) + .getBuckets().stream() + .map(b -> parse(b, agg.getName())) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + private Map parse(MultiBucketsAggregation.Bucket bucket, String name) { + if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { + return parse(compositeBucket); + } else if (bucket instanceof Range.Bucket rangeBucket) { + return parse(rangeBucket, name); + } + return metricsParser.parse(bucket.getAggregations()); + } + + private Map parse(CompositeAggregation.Bucket bucket) { + Map resultMap = new HashMap<>(); + resultMap.putAll(bucket.getKey()); + resultMap.putAll(metricsParser.parse(bucket.getAggregations())); + return resultMap; + } + + private Map parse(Range.Bucket bucket, String name) { + if (bucket.getDocCount() == 0) { + return null; + } + Map resultMap = new HashMap<>(); + resultMap.put(name, bucket.getKey()); + resultMap.putAll(metricsParser.parse(bucket.getAggregations())); + return resultMap; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java deleted file mode 100644 index 8104656feb0..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/RangeParser.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.response.agg; - -import java.util.LinkedHashMap; -import java.util.Map; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import org.opensearch.search.aggregations.Aggregation; -import org.opensearch.search.aggregations.bucket.range.Range; - -/** - * Parser for {@link Range} aggregations - * (org.opensearch.search.aggregations.bucket.range.InternalRange). Parses range bucket aggregations - * and returns a map with range keys and their document counts. - */ -@EqualsAndHashCode -@RequiredArgsConstructor -public class RangeParser implements MetricParser { - - @Getter private final String name; - - @Override - public Map parse(Aggregation aggregation) { - Range rangeAgg = (Range) aggregation; - Map result = new LinkedHashMap<>(); - - for (Range.Bucket bucket : rangeAgg.getBuckets()) { - String key = bucket.getKeyAsString(); - if (key == null || key.isEmpty()) { - // Generate key from range bounds if no explicit key - key = generateRangeKey(bucket); - } - if (bucket.getAggregations() != null && bucket.getDocCount() > 0) { - result.put(key, bucket.getAggregations()); - } - } - return result; - } - - /** - * Generates a human-readable key for range buckets without explicit keys. - * - * @param bucket the range bucket - * @return formatted range key (e.g., "10.0-20.0", "*-10.0", "20.0-*") - */ - private String generateRangeKey(Range.Bucket bucket) { - Object from = bucket.getFrom(); - Object to = bucket.getTo(); - - String fromStr = (from == null) ? "*" : from.toString(); - String toStr = (to == null) ? "*" : to.toString(); - - return fromStr + "-" + toStr; - } -} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java index 57f7c4ea044..0887ccab75b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/response/OpenSearchAggregationResponseParserTest.java @@ -22,9 +22,9 @@ import org.junit.jupiter.api.DisplayNameGenerator; import org.junit.jupiter.api.Test; import org.opensearch.search.aggregations.metrics.ExtendedStats; -import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.FilterParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.PercentilesParser; @@ -88,7 +88,7 @@ void one_bucket_one_metric_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new BucketAggregationParser(new SingleValueParser("avg")); + new LeafBucketAggregationParser(new SingleValueParser("avg")); assertThat( parse(parser, response), containsInAnyOrder( @@ -206,7 +206,7 @@ void filter_aggregation_group_by_should_pass() { + " }\n" + "}"; OpenSearchAggregationResponseParser parser = - new BucketAggregationParser( + new LeafBucketAggregationParser( FilterParser.builder() .name("filter") .metricsParser(new SingleValueParser("avg")) @@ -370,7 +370,7 @@ void one_bucket_one_metric_percentile_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new BucketAggregationParser(new SinglePercentileParser("percentile")); + new LeafBucketAggregationParser(new SinglePercentileParser("percentile")); assertThat( parse(parser, response), containsInAnyOrder( @@ -495,7 +495,7 @@ void one_bucket_percentiles_should_pass() { + "}"; OpenSearchAggregationResponseParser parser = - new BucketAggregationParser(new PercentilesParser("percentiles")); + new LeafBucketAggregationParser(new PercentilesParser("percentiles")); assertThat( parse(parser, response), containsInAnyOrder( From cbcb25abaa947243e33094f2bd9d02b9d077cbee Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Fri, 26 Sep 2025 19:05:51 +0800 Subject: [PATCH 04/26] Fix unit tests Signed-off-by: Yuanchun Shen --- .../opensearch/request/AggregateAnalyzer.java | 1 + .../response/agg/BucketAggregationParser.java | 3 ++- .../agg/LeafBucketAggregationParser.java | 24 ++++++++----------- .../request/AggregateAnalyzerTest.java | 12 +++++++--- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index a1dbb615f8e..4d47e3fade0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -204,6 +204,7 @@ public static Pair, OpenSearchAggregationResponseParser // Find group by fields derived from CASE functions and convert them to range queries List> groupsByCase = groupList.stream() + .filter(i -> project != null && i < project.getProjects().size()) .map(i -> Pair.of(i, project.getNamedProjects().get(i))) .filter( p -> diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index bab5a4e27fa..a8cf1145494 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -10,13 +10,14 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import lombok.Getter; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; public class BucketAggregationParser implements OpenSearchAggregationResponseParser { - private final OpenSearchAggregationResponseParser subAggParser; + @Getter private final OpenSearchAggregationResponseParser subAggParser; public BucketAggregationParser(OpenSearchAggregationResponseParser subAggParser) { this.subAggParser = subAggParser; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index 0ba384d8989..bc11833add8 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -7,11 +7,13 @@ import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; +import lombok.Getter; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; @@ -24,7 +26,7 @@ */ @EqualsAndHashCode public class LeafBucketAggregationParser implements OpenSearchAggregationResponseParser { - private final MetricParserHelper metricsParser; + @Getter private final MetricParserHelper metricsParser; public LeafBucketAggregationParser(MetricParser... metricParserList) { metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); @@ -47,25 +49,19 @@ public List> parse(Aggregations aggregations) { private Map parse(MultiBucketsAggregation.Bucket bucket, String name) { if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { return parse(compositeBucket); - } else if (bucket instanceof Range.Bucket rangeBucket) { - return parse(rangeBucket, name); } - return metricsParser.parse(bucket.getAggregations()); - } - - private Map parse(CompositeAggregation.Bucket bucket) { - Map resultMap = new HashMap<>(); - resultMap.putAll(bucket.getKey()); + if (bucket instanceof Range.Bucket && bucket.getDocCount() == 0) { + return null; + } + Map resultMap = new LinkedHashMap<>(); + resultMap.put(name, bucket.getKey()); resultMap.putAll(metricsParser.parse(bucket.getAggregations())); return resultMap; } - private Map parse(Range.Bucket bucket, String name) { - if (bucket.getDocCount() == 0) { - return null; - } + private Map parse(CompositeAggregation.Bucket bucket) { Map resultMap = new HashMap<>(); - resultMap.put(name, bucket.getKey()); + resultMap.putAll(bucket.getKey()); resultMap.putAll(metricsParser.parse(bucket.getAggregations())); return resultMap; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index e304f504242..3c8b928cb31 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -46,8 +46,9 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.request.AggregateAnalyzer.ExpressionNotAnalyzableException; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.FilterParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParserHelper; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -281,9 +282,11 @@ void analyze_groupBy() throws ExpressionNotAnalyzableException { + "{\"b\":{\"terms\":{\"field\":\"b.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}," + "\"aggregations\":{\"cnt\":{\"value_count\":{\"field\":\"_index\"}}}}}]", result.getLeft().toString()); - assertInstanceOf(CompositeAggregationParser.class, result.getRight()); + assertInstanceOf(BucketAggregationParser.class, result.getRight()); MetricParserHelper metricsParser = - ((CompositeAggregationParser) result.getRight()).getMetricsParser(); + ((LeafBucketAggregationParser) + ((BucketAggregationParser) result.getRight()).getSubAggParser()) + .getMetricsParser(); assertEquals(1, metricsParser.getMetricParserMap().size()); metricsParser .getMetricParserMap() @@ -592,8 +595,11 @@ private Project createMockProject(List refIndex) { when(ref.getType()).thenReturn(typeFactory.createSqlType(SqlTypeName.INTEGER)); rexNodes.add(ref); } + List> namedProjects = + rexNodes.stream().map(n -> org.apache.calcite.util.Pair.of(n, n.toString())).toList(); when(project.getProjects()).thenReturn(rexNodes); when(project.getRowType()).thenReturn(rowType); + when(project.getNamedProjects()).thenReturn(namedProjects); return project; } From 51813f0dcf669b6d17acc216de60f46af8407131 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 16:26:57 +0800 Subject: [PATCH 05/26] Fix parsers to multi-range cases Signed-off-by: Yuanchun Shen --- .../opensearch/request/AggregateAnalyzer.java | 74 ++++++++++--------- .../opensearch/request/CaseRangeAnalyzer.java | 5 +- .../response/agg/BucketAggregationParser.java | 52 ++++++------- .../agg/LeafBucketAggregationParser.java | 31 ++++---- 4 files changed, 77 insertions(+), 85 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index cce088d919b..3562fe970e7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -83,7 +83,6 @@ import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; import org.opensearch.sql.opensearch.response.agg.ArgMaxMinParser; import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; import org.opensearch.sql.opensearch.response.agg.CountAsTotalHitsParser; import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; @@ -225,6 +224,23 @@ public static Pair, OpenSearchAggregationResponseParser .map(p -> Pair.of(p.getLeft(), p.getRight().get())) .toList(); + // TODO: Rename toRemove to express removing case() + // Remove groups that are converted to ranges from groupList + Set toRemove = groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); + // The group-by list after removing CASE that can be converted to range queries + List filteredGroupList = + groupList.stream().filter(i -> !toRemove.contains(i)).toList(); + + // both count() and count(FIELD) can apply doc_count optimization in non-bucket aggregation, + // but only count() can apply doc_count optimization in bucket aggregation. + boolean countAllOnly = !filteredGroupList.isEmpty(); + Pair, Builder> pair = + removeCountAggregationBuilders(metricBuilder, countAllOnly); + List removedCountAggBuilders = pair.getLeft(); + Builder newMetricBuilder = pair.getRight(); + List countAggNameList = + removedCountAggBuilders.stream().map(ValuesSourceAggregationBuilder::getName).toList(); + // Cascade aggregations in such a way: // RangeAggregation // ...Any other range aggregations @@ -235,33 +251,20 @@ public static Pair, OpenSearchAggregationResponseParser BucketAggregationParser bucketAggregationParser = null; if (!groupsByCase.isEmpty()) { for (int i = 0; i < groupsByCase.size(); i++) { - Pair pair = groupsByCase.get(i); + Pair p = groupsByCase.get(i); if (i == 0) { - rangeAggregationBuilder = pair.getRight(); + rangeAggregationBuilder = p.getRight(); bucketAggregationParser = - new BucketAggregationParser(new LeafBucketAggregationParser(metricParsers)); + new BucketAggregationParser( + new LeafBucketAggregationParser(metricParsers, countAggNameList)); } else { - groupsByCase.get(i - 1).getRight().subAggregation(pair.getRight()); + groupsByCase.get(i - 1).getRight().subAggregation(p.getRight()); bucketAggregationParser = new BucketAggregationParser(bucketAggregationParser); } } - groupsByCase.getLast().getRight().subAggregations(metricBuilder); - } - // TODO: Rename toRemove to express removing case() - // Remove groups that are converted to ranges from groupList - Set toRemove = groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); - // The group-by list after removing CASE that can be converted to range queries - List filteredGroupList = - groupList.stream().filter(i -> !toRemove.contains(i)).toList(); - - // both count() and count(FIELD) can apply doc_count optimization in non-bucket aggregation, - // but only count() can apply doc_count optimization in bucket aggregation. - boolean countAllOnly = !filteredGroupList.isEmpty(); - Pair, Builder> pair = - removeCountAggregationBuilders(metricBuilder, countAllOnly); - List removedCountAggBuilders = pair.getLeft(); - Builder newMetricBuilder = pair.getRight(); + groupsByCase.getLast().getRight().subAggregations(newMetricBuilder); + } boolean removedCountAggBuildersHaveSameField = removedCountAggBuilders.stream() @@ -272,23 +275,23 @@ public static Pair, OpenSearchAggregationResponseParser boolean allCountAggRemoved = removedCountAggBuilders.size() == metricBuilder.getAggregatorFactories().size(); - // The top-level query is a range query: stats avg() by range_field + // The top-level query is a range query: + // - stats avg() by range_field + // - stats count() by range_field + // - stats avg(), count() by range_field // RangeAgg // Metric if (!groupsByCase.isEmpty() && filteredGroupList.isEmpty()) { return Pair.of(List.of(rangeAggregationBuilder), bucketAggregationParser); } - // No parent composite aggregation or range aggregation is attached: stats count(), stats - // avg() + // No parent composite aggregation or range aggregation is attached: + // - stats count() + // - stats avg() // Metric else if (aggregate.getGroupSet().isEmpty() && filteredGroupList.isEmpty()) { if (allCountAggRemoved && removedCountAggBuildersHaveSameField) { // The optimization must require all count aggregations are removed, // and they have only one field name - List countAggNameList = - removedCountAggBuilders.stream() - .map(ValuesSourceAggregationBuilder::getName) - .toList(); return Pair.of( ImmutableList.copyOf(newMetricBuilder.getAggregatorFactories()), new CountAsTotalHitsParser(countAggNameList)); @@ -316,8 +319,8 @@ && isAutoDateSpan(project.getProjects().get(groupList.getFirst()))) { Collections.singletonList(bucketBuilder.subAggregations(metricBuilder)), new LeafBucketAggregationParser(metricParsers)); } - // It has both composite aggregation and range aggregation: stats avg() by range_field, - // non_range_field + // It has both composite aggregation and range aggregation: + // - stats avg() by range_field, non_range_field // CompositeAgg // RangeAgg // Metric @@ -331,8 +334,8 @@ else if (!groupsByCase.isEmpty()) { .size(AGGREGATION_BUCKET_SIZE)), new BucketAggregationParser(bucketAggregationParser)); } - // It does not have range aggregation, but has composite aggregation: stats avg() by - // non_range_field + // It does not have range aggregation, but has composite aggregation: + // - stats avg() by non_range_field // CompositeAgg // Metric else { @@ -349,18 +352,17 @@ else if (!groupsByCase.isEmpty()) { aggregationBuilder.subAggregations(metricBuilder); return Pair.of( Collections.singletonList(aggregationBuilder), - new CompositeAggregationParser(metricParsers)); + new BucketAggregationParser(new LeafBucketAggregationParser(metricParsers))); } // No need to register sub-factories if no aggregator factories left after removing all // ValueCountAggregationBuilder. if (!newMetricBuilder.getAggregatorFactories().isEmpty()) { aggregationBuilder.subAggregations(newMetricBuilder); } - List countAggNameList = - removedCountAggBuilders.stream().map(ValuesSourceAggregationBuilder::getName).toList(); return Pair.of( Collections.singletonList(aggregationBuilder), - new CompositeAggregationParser(metricParsers, countAggNameList)); + new BucketAggregationParser( + new LeafBucketAggregationParser(metricParsers, countAggNameList))); } } catch (Throwable e) { Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index 921ccb649ed..926936e0143 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -188,8 +188,9 @@ private void analyzeSearchCondition(RexCall searchCall, String key) { throwUnsupported("Range query must be performed on the same field"); } RexLiteral literal = (RexLiteral) searchCall.getOperands().getLast(); - Sarg sarg = literal.getValueAs(Sarg.class); - for (Object r : sarg.rangeSet.asRanges()) { + Sarg sarg = Objects.requireNonNull(literal.getValueAs(Sarg.class)); + for (Range r : sarg.rangeSet.asRanges()) { + @SuppressWarnings("unchecked") Range range = (Range) r; validateRange(range); if (!range.hasLowerBound() && range.hasUpperBound()) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index f331bbee8f1..568e517bf73 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -5,7 +5,6 @@ package org.opensearch.sql.opensearch.response.agg; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -16,6 +15,7 @@ import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.range.Range; public class BucketAggregationParser implements OpenSearchAggregationResponseParser { @Getter private final OpenSearchAggregationResponseParser subAggParser; @@ -27,20 +27,16 @@ public BucketAggregationParser(OpenSearchAggregationResponseParser subAggParser) @Override public List> parse(Aggregations aggregations) { if (subAggParser instanceof BucketAggregationParser) { - return aggregations.asList().stream() - .map( - aggregation -> { - if (aggregation instanceof CompositeAggregation) { - return (CompositeAggregation) aggregation; - } else { - return (MultiBucketsAggregation) aggregation; - } - }) - .map(MultiBucketsAggregation::getBuckets) - .flatMap(List::stream) - .map(this::parse) - .flatMap(List::stream) - .collect(Collectors.toList()); + Aggregation aggregation = aggregations.asList().getFirst(); + if (!(aggregation instanceof MultiBucketsAggregation)) { + throw new IllegalStateException( + "BucketAggregationParser can only be used with MultiBucketsAggregation"); + } + return ((MultiBucketsAggregation) aggregation) + .getBuckets().stream() + .map(b -> parse(b, aggregation.getName())) + .flatMap(List::stream) + .collect(Collectors.toList()); } else if (subAggParser instanceof LeafBucketAggregationParser) { return subAggParser.parse(aggregations); } else { @@ -50,23 +46,17 @@ public List> parse(Aggregations aggregations) { } } - private List> parse(MultiBucketsAggregation.Bucket bucket) { - if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { - return parse(compositeBucket); - } - List> results = new ArrayList<>(); - for (Aggregation subAgg : bucket.getAggregations()) { - var sub = (Aggregations) subAgg; - results.addAll(subAggParser.parse(sub)); - } - return results; - } - - private List> parse(CompositeAggregation.Bucket bucket) { - Map common = new HashMap<>(bucket.getKey()); + private List> parse(MultiBucketsAggregation.Bucket bucket, String name) { List> results = subAggParser.parse(bucket.getAggregations()); - for (Map r : results) { - r.putAll(common); + if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { + Map common = new HashMap<>(compositeBucket.getKey()); + for (Map r : results) { + r.putAll(common); + } + } else if (bucket instanceof Range.Bucket) { + for (Map r : results) { + r.put(name, bucket.getKey()); + } } return results; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index 15c552bfbd9..547c7adabec 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -6,8 +6,6 @@ package org.opensearch.sql.opensearch.response.agg; import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -28,6 +26,8 @@ @EqualsAndHashCode public class LeafBucketAggregationParser implements OpenSearchAggregationResponseParser { @Getter private final MetricParserHelper metricsParser; + // countAggNameList dedicated the list of count aggregations which are filled by doc_count + private List countAggNameList = List.of(); public LeafBucketAggregationParser(MetricParser... metricParserList) { metricsParser = new MetricParserHelper(Arrays.asList(metricParserList)); @@ -37,6 +37,13 @@ public LeafBucketAggregationParser(List metricParserList) { metricsParser = new MetricParserHelper(metricParserList); } + /** CompositeAggregationParser with count aggregation name list, used in v3 */ + public LeafBucketAggregationParser( + List metricParserList, List countAggNameList) { + metricsParser = new MetricParserHelper(metricParserList); + this.countAggNameList = countAggNameList; + } + @Override public List> parse(Aggregations aggregations) { Aggregation agg = aggregations.asList().getFirst(); @@ -54,22 +61,14 @@ public List> parse(SearchHits hits) { } private Map parse(MultiBucketsAggregation.Bucket bucket, String name) { + Map result = metricsParser.parse(bucket.getAggregations()); if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { - return parse(compositeBucket); - } - if (bucket instanceof Range.Bucket && bucket.getDocCount() == 0) { + result.putAll(compositeBucket.getKey()); + } else if (bucket instanceof Range.Bucket && bucket.getDocCount() == 0) { return null; } - Map resultMap = new LinkedHashMap<>(); - resultMap.put(name, bucket.getKey()); - resultMap.putAll(metricsParser.parse(bucket.getAggregations())); - return resultMap; - } - - private Map parse(CompositeAggregation.Bucket bucket) { - Map resultMap = new HashMap<>(); - resultMap.putAll(bucket.getKey()); - resultMap.putAll(metricsParser.parse(bucket.getAggregations())); - return resultMap; + result.put(name, bucket.getKey()); + countAggNameList.forEach(n -> result.put(n, bucket.getDocCount())); + return result; } } From 373e82550495c0ab34a49e4e51096c937c360e8d Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 16:52:02 +0800 Subject: [PATCH 06/26] Update leaf bucket parser Signed-off-by: Yuanchun Shen --- .../response/agg/BucketAggregationParser.java | 3 +-- .../response/agg/LeafBucketAggregationParser.java | 14 ++++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index 568e517bf73..6f98e6620a0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -8,7 +8,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import lombok.Getter; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregation; @@ -36,7 +35,7 @@ public List> parse(Aggregations aggregations) { .getBuckets().stream() .map(b -> parse(b, aggregation.getName())) .flatMap(List::stream) - .collect(Collectors.toList()); + .toList(); } else if (subAggParser instanceof LeafBucketAggregationParser) { return subAggParser.parse(aggregations); } else { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index 547c7adabec..ca6d6c0eb49 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -9,7 +9,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.search.SearchHits; @@ -48,10 +47,7 @@ public LeafBucketAggregationParser( public List> parse(Aggregations aggregations) { Aggregation agg = aggregations.asList().getFirst(); return ((MultiBucketsAggregation) agg) - .getBuckets().stream() - .map(b -> parse(b, agg.getName())) - .filter(Objects::nonNull) - .collect(Collectors.toList()); + .getBuckets().stream().map(b -> parse(b, agg.getName())).filter(Objects::nonNull).toList(); } @Override @@ -64,10 +60,12 @@ private Map parse(MultiBucketsAggregation.Bucket bucket, String Map result = metricsParser.parse(bucket.getAggregations()); if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { result.putAll(compositeBucket.getKey()); - } else if (bucket instanceof Range.Bucket && bucket.getDocCount() == 0) { - return null; + } else if (bucket instanceof Range.Bucket) { + if (bucket.getDocCount() == 0) { + return null; + } + result.put(name, bucket.getKey()); } - result.put(name, bucket.getKey()); countAggNameList.forEach(n -> result.put(n, bucket.getDocCount())); return result; } From f3830a51426381495175d874367fc283b0fe0789 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 17:11:48 +0800 Subject: [PATCH 07/26] Unit test case range analyzer Signed-off-by: Yuanchun Shen --- .../request/CaseRangeAnalyzerTest.java | 816 ++++++++++++++++++ 1 file changed, 816 insertions(+) create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java new file mode 100644 index 00000000000..0a27ef04801 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java @@ -0,0 +1,816 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.request; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.Range; +import com.google.common.collect.TreeRangeSet; +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Optional; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUnknownAs; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeFactoryImpl; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Sarg; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; + +class CaseRangeAnalyzerTest { + + private RelDataTypeFactory typeFactory; + private RexBuilder rexBuilder; + private RelDataType rowType; + private RexInputRef fieldRef; + + @BeforeEach + void setUp() { + typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + rexBuilder = new RexBuilder(typeFactory); + + // Create a row type with fields: age (INTEGER), name (VARCHAR) + rowType = + typeFactory + .builder() + .add("age", SqlTypeName.INTEGER) + .add("name", SqlTypeName.VARCHAR) + .build(); + + fieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0); // age field + } + + @Test + void testCreateCaseRangeAnalyzer() { + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test_agg", rowType); + assertNotNull(analyzer); + } + + @Test + void testAnalyzeSimpleCaseExpression() { + // CASE + // WHEN age >= 18 THEN 'adult' + // WHEN age >= 13 THEN 'teen' + // ELSE 'child' + // END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal13 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(13)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral teenLiteral = rexBuilder.makeLiteral("teen"); + RexLiteral childLiteral = rexBuilder.makeLiteral("child"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal13); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, adultLiteral, condition2, teenLiteral, childLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_ranges", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + assertEquals("age_ranges", builder.getName()); + assertEquals("age", builder.field()); + + String expectedJson = + """ + { + "age_ranges" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "teen", + "from" : 13.0, + "to" : 18.0 + }, + { + "key" : "child", + "to" : 13.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeLessThanComparison() { + // CASE WHEN age < 18 THEN 'minor' ELSE 'adult' END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + + RexCall condition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, minorLiteral, adultLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "minor", + "to" : 18.0 + }, + { + "key" : "adult", + "from" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithSearchCondition() { + // Create a SEARCH condition (Sarg-based range) + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral workingLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral otherLiteral = rexBuilder.makeLiteral("other"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, workingLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_groups", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_groups" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "working_age", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "other", + "to" : 18.0 + }, + { + "key" : "other", + "from" : 65.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithNullElse() { + // CASE WHEN age >= 18 THEN 'adult' ELSE NULL END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, nullLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + // Should use DEFAULT_ELSE_KEY for null else clause + + String expectedJson = + """ + { + "age_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "null", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeNonCaseExpression() { + // Test with non-CASE expression - create a simple call that's not CASE + RexLiteral literal = rexBuilder.makeLiteral("test"); + RexCall nonCaseCall = (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.UPPER, literal); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(nonCaseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithNonLiteralResult() { + // CASE WHEN age >= 18 THEN age ELSE 0 END (non-literal result) + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral zeroLiteral = rexBuilder.makeExactLiteral(BigDecimal.valueOf(0)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition, fieldRef, zeroLiteral)); // fieldRef as result, not literal + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertFalse(result.isPresent()); + } + + @Test + void testAnalyzeWithDifferentFields() { + // Test comparing different fields in conditions + RexInputRef nameFieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(1).getType(), 1); + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literalName = rexBuilder.makeLiteral("John"); + RexLiteral result1 = rexBuilder.makeLiteral("result1"); + RexLiteral result2 = rexBuilder.makeLiteral("result2"); + RexLiteral elseResult = rexBuilder.makeLiteral("else"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); // age >= 18 + RexCall condition2 = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, nameFieldRef, literalName); // name = 'John' + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, result1, condition2, result2, elseResult)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testAnalyzeWithAndCondition() { + // Test AND condition which should be unsupported + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("other"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + RexCall condition2 = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal65); + RexCall andCondition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.AND, condition1, condition2); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(andCondition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testAnalyzeWithOrCondition() { + // Test OR condition which should be unsupported + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("age_group"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("other"); + + RexCall condition1 = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal18); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal65); + RexCall orCondition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.OR, condition1, condition2); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(orCondition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testAnalyzeWithUnsupportedComparison() { + // Test GREATER_THAN which should be converted to supported operations or fail + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.GREATER_THAN, fieldRef, literal18); // This should fail + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testAnalyzeWithReversedComparison() { + // Test literal on left side: 18 <= age (should be converted to age >= 18) + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.LESS_THAN_OR_EQUAL, literal18, fieldRef); // 18 <= age + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("reversed_test", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "reversed_test" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testAnalyzeWithInvalidOperands() { + // Test condition with neither field reference nor literal + RexCall invalidCondition = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, + fieldRef, + fieldRef); // both sides are field refs + + RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(invalidCondition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testAnalyzeWithNullLiteralValue() { + // Test with null literal value that can't be converted to Double + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.INTEGER)); + RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, nullLiteral); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } + + @Test + void testDefaultElseKey() { + assertEquals("null", CaseRangeAnalyzer.DEFAULT_ELSE_KEY); + } + + @Test + void testSimpleCaseGeneratesExpectedDSL() { + // CASE WHEN age >= 18 THEN 'adult' ELSE 'minor' END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, minorLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_groups", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_groups" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testMultipleConditionsGenerateExpectedDSL() { + // CASE + // WHEN age >= 65 THEN 'senior' + // WHEN age >= 18 THEN 'adult' + // ELSE 'minor' + // END + + RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral seniorLiteral = rexBuilder.makeLiteral("senior"); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral minorLiteral = rexBuilder.makeLiteral("minor"); + + RexCall condition1 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal65); + RexCall condition2 = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + Arrays.asList(condition1, seniorLiteral, condition2, adultLiteral, minorLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("age_categories", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "age_categories" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "senior", + "from" : 65.0 + }, + { + "key" : "adult", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "minor", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testLessThanConditionGeneratesExpectedDSL() { + // CASE WHEN age < 21 THEN 'underage' ELSE 'legal' END + + RexLiteral literal21 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(21)); + RexLiteral underageLiteral = rexBuilder.makeLiteral("underage"); + RexLiteral legalLiteral = rexBuilder.makeLiteral("legal"); + + RexCall condition = + (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, fieldRef, literal21); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, underageLiteral, legalLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("legal_status", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "legal_status" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "underage", + "to" : 21.0 + }, + { + "key" : "legal", + "from" : 21.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testNullElseClauseGeneratesExpectedDSL() { + // CASE WHEN age >= 18 THEN 'adult' ELSE NULL END + + RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); + RexLiteral adultLiteral = rexBuilder.makeLiteral("adult"); + RexLiteral nullLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall condition = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, fieldRef, literal18); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(condition, adultLiteral, nullLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("adult_check", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "adult_check" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "adult", + "from" : 18.0 + }, + { + "key" : "null", + "to" : 18.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + @Test + void testSearchConditionGeneratesExpectedDSL() { + // Create a SEARCH condition (Sarg-based range): 18 <= age < 65 + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral workingLiteral = rexBuilder.makeLiteral("working_age"); + RexLiteral otherLiteral = rexBuilder.makeLiteral("other"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, workingLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("employment_status", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "employment_status" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "working_age", + "from" : 18.0, + "to" : 65.0 + }, + { + "key" : "other", + "to" : 18.0 + }, + { + "key" : "other", + "from" : 65.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + + /** + * Helper method to normalize JSON strings for comparison by removing extra whitespace and + * ensuring consistent formatting. + */ + private String normalizeJson(String json) { + return json.replaceAll("\\s+", " ").replaceAll("\\s*([{}\\[\\],:]?)\\s*", "$1").trim(); + } + + @Test + void testAnalyzeSearchConditionWithInvalidField() { + // Create a SEARCH condition with non-field reference + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(18), BigDecimal.valueOf(65))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + RexLiteral constantLiteral = rexBuilder.makeExactLiteral(BigDecimal.valueOf(42)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.SEARCH, + Arrays.asList(constantLiteral, sargLiteral)); // constant instead of field + + RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); + RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, resultLiteral, elseLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + + assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + } +} From da5a9c53aaca64a849cdc610662e4b13aa798ca9 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 19:02:42 +0800 Subject: [PATCH 08/26] Add explain ITs for pushing down case in aggregations Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalciteExplainIT.java | 88 +++++++++++++++++++ .../calcite/agg_case_cannot_push.yaml | 13 +++ .../agg_case_composite_cannot_push.yaml | 13 +++ .../agg_composite2_range_count_push.yaml | 9 ++ ...agg_composite2_range_range_count_push.yaml | 9 ++ .../agg_composite_range_metric_push.yaml | 9 ++ .../calcite/agg_range_count_push.yaml | 10 +++ .../agg_range_metric_complex_push.yaml | 10 +++ .../calcite/agg_range_metric_push.yaml | 10 +++ .../calcite/agg_range_range_metric_push.yaml | 10 +++ .../agg_case_cannot_push.yaml | 13 +++ .../agg_case_composite_cannot_push.yaml | 13 +++ .../agg_composite2_range_count_push.yaml | 13 +++ ...agg_composite2_range_range_count_push.yaml | 13 +++ .../agg_composite_range_metric_push.yaml | 13 +++ .../agg_range_count_push.yaml | 13 +++ .../agg_range_metric_complex_push.yaml | 13 +++ .../agg_range_metric_push.yaml | 13 +++ .../agg_range_range_metric_push.yaml | 13 +++ 19 files changed, 298 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 99ef4a8830c..eaa0833ce88 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -893,4 +893,92 @@ public void testExplainPushDownScriptsContainingUDT() throws IOException { + " span(t, 1d)", TEST_INDEX_BANK))); } + + @Test + public void testCasePushdownAsRangeQueryExplain() throws IOException { + // CASE 1: Range - Metric + // 1.1 Range - Metric + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_range_metric_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |" + + " stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK))); + + // 1.2 Range - Metric (COUNT) + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_range_count_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40, 'u40'" + + " else 'u100') | stats avg(age) by age_range", + TEST_INDEX_BANK))); + + // 1.3 Range - Range - Metric + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_range_range_metric_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100')," + + " balance_range = case(balance < 20000, 'medium' else 'high') | stats" + + " avg(balance) as avg_balance by age_range, balance_range", + TEST_INDEX_BANK))); + + // 1.4 Range - Metric (With null & discontinuous ranges) + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_range_metric_complex_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age" + + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", + TEST_INDEX_BANK))); + + // 1.5 Should not be pushed because the range is not closed-open + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_case_cannot_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'" + + " else 'u100') | stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK))); + + // CASE 2: Composite - Range - Metric + // 2.1 Composite(1 field) - Range - Metric + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_composite_range_metric_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)" + + " by state, age_range", + TEST_INDEX_BANK))); + + // 2.2 Composite(2 fields) - Range - Metric (with count) + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_composite2_range_count_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats" + + " avg(balance), count() by age_range, state, gender", + TEST_INDEX_BANK))); + + // 2.3 Composite (2 fields) - Range - Range - Metric (with count) + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_composite2_range_range_count_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range =" + + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as" + + " avg_balance by age_range, balance_range, state", + TEST_INDEX_BANK))); + + // 2.4 Should not be pushed because case result expression is not constant + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_case_composite_cannot_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)" + + " as avg_balance by age_range, state", + TEST_INDEX_BANK))); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml new file mode 100644 index 00000000000..66e665267d3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[30], expr#2=[<($t0, $t1)], expr#3=['u30':VARCHAR], expr#4=[Sarg[[30..40]]], expr#5=[SEARCH($t0, $t4)], expr#6=['u40':VARCHAR], expr#7=['u100':VARCHAR], expr#8=[CASE($t2, $t3, $t5, $t6, $t7)], $f0=[$t8], age=[$t0]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml new file mode 100644 index 00000000000..bd197c4f797 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], state=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], state=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[35], expr#5=[<($t0, $t4)], expr#6=['u35':VARCHAR], expr#7=[CASE($t5, $t6, $t1)], $f0=[$t7], state=[$t2], balance=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, email, state, balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["age","email","state","balance"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml new file mode 100644 index 00000000000..353bcf5c1e9 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_count_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$3], count()=[$4], age_range=[$0], state=[$1], gender=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(balance)=[AVG($3)], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg(balance)=AVG($3),count()=COUNT()), PROJECT->[avg(balance), count(), age_range, state, gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml new file mode 100644 index 00000000000..eef2a7b23f8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite2_range_range_count_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$3], age_range=[$0], balance_range=[$1], state=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg_balance=[AVG($3)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, 'a35':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1, 2},avg_balance=AVG($3)), PROJECT->[avg_balance, age_range, balance_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u35","to":35.0},{"key":"a35","from":35.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml new file mode 100644 index 00000000000..065598bc82c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_range_metric_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], age_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), state, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"a30","from":30.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml new file mode 100644 index 00000000000..498786a6aef --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_count_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(age)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(age)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40)]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(age)=AVG($1)), PROJECT->[avg(age), age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"avg(age)":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml new file mode 100644 index 00000000000..f3d749487c0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_complex_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[35..40), [80..+∞)]), '30-40 or >=80':VARCHAR, null:NULL)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(balance)=AVG($1)), PROJECT->[avg(balance), age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"30-40 or >=80","from":35.0,"to":40.0},{"key":"30-40 or >=80","from":80.0},{"key":"null","from":30.0,"to":35.0},{"key":"null","from":40.0,"to":80.0}],"keyed":true},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml new file mode 100644 index 00000000000..ee0a5ce9448 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_metric_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml new file mode 100644 index 00000000000..5b44ebfdc68 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_range_range_metric_push.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], balance_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, balance_range]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"age_range":{"range":{"field":"age","ranges":[{"key":"u30","to":30.0},{"key":"u40","from":30.0,"to":40.0},{"key":"u100","from":40.0}],"keyed":true},"aggregations":{"balance_range":{"range":{"field":"balance","ranges":[{"key":"medium","to":20000.0},{"key":"high","from":20000.0}],"keyed":true},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml new file mode 100644 index 00000000000..f8fd80e1598 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[30..40]]], expr#23=[SEARCH($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml new file mode 100644 index 00000000000..059caa2e2d2 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_composite_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], state=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], state=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[35], expr#20=[<($t10, $t19)], expr#21=['u35':VARCHAR], expr#22=[CASE($t20, $t21, $t11)], age_range=[$t22], state=[$t9], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml new file mode 100644 index 00000000000..43e27cd2d5d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$3], count()=[$4], age_range=[$0], state=[$1], gender=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(balance)=[AVG($3)], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], state=[$9], gender=[$4], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[=($t4, $t6)], expr#8=[null:BIGINT], expr#9=[CASE($t7, $t8, $t3)], expr#10=[CAST($t9):DOUBLE], expr#11=[/($t10, $t4)], avg(balance)=[$t11], count()=[$t5], age_range=[$t0], state=[$t1], gender=[$t2]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($3)], agg#1=[COUNT($3)], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=['a30':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], age_range=[$t23], state=[$t9], gender=[$t4], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml new file mode 100644 index 00000000000..6dfa7cd65a3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite2_range_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$3], age_range=[$0], balance_range=[$1], state=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg_balance=[AVG($3)]) + LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, 'a35':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], state=[$9], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0], expr#6=[=($t4, $t5)], expr#7=[null:BIGINT], expr#8=[CASE($t6, $t7, $t3)], expr#9=[CAST($t8):DOUBLE], expr#10=[/($t9, $t4)], avg_balance=[$t10], age_range=[$t0], balance_range=[$t1], state=[$t2]) + EnumerableAggregate(group=[{0, 1, 2}], agg#0=[$SUM0($3)], agg#1=[COUNT($3)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[35], expr#20=[<($t10, $t19)], expr#21=['u35':VARCHAR], expr#22=['a35':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], expr#24=[20000], expr#25=[<($t7, $t24)], expr#26=['medium':VARCHAR], expr#27=['high':VARCHAR], expr#28=[CASE($t25, $t26, $t27)], age_range=[$t23], balance_range=[$t28], state=[$t9], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml new file mode 100644 index 00000000000..41ed8ba61fc --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$2], state=[$0], age_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)]) + LogicalProject(state=[$9], age_range=[CASE(<($10, 30), 'u30':VARCHAR, 'a30':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(balance)=[$t9], state=[$t0], age_range=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=['a30':VARCHAR], expr#23=[CASE($t20, $t21, $t22)], state=[$t9], age_range=[$t23], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml new file mode 100644 index 00000000000..67ad0f0fd07 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_count_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(age)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(age)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40)]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(age)=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[30..40)]], expr#23=[SEARCH($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml new file mode 100644 index 00000000000..10ead7ad449 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_complex_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(balance)=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg(balance)=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[35..40), [80..+∞)]), '30-40 or >=80':VARCHAR, null:NULL)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg(balance)=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[Sarg[[35..40), [80..+∞)]], expr#23=[SEARCH($t10, $t22)], expr#24=['30-40 or >=80':VARCHAR], expr#25=[null:NULL], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml new file mode 100644 index 00000000000..a81e208bdbf --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_age=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], avg_age=[AVG($1)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) + EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[40], expr#23=[<($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], age_range=[$t26], age=[$t10]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml new file mode 100644 index 00000000000..404726f6083 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_range_range_metric_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg_balance=[$2], age_range=[$0], balance_range=[$1]) + LogicalAggregate(group=[{0, 1}], avg_balance=[AVG($2)]) + LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, <($10, 40), 'u40':VARCHAR, 'u100':VARCHAR)], balance_range=[CASE(<($7, 20000), 'medium':VARCHAR, 'high':VARCHAR)], balance=[$7]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], balance_range=[$t1]) + EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=['u30':VARCHAR], expr#22=[40], expr#23=[<($t10, $t22)], expr#24=['u40':VARCHAR], expr#25=['u100':VARCHAR], expr#26=[CASE($t20, $t21, $t23, $t24, $t25)], expr#27=[20000], expr#28=[<($t7, $t27)], expr#29=['medium':VARCHAR], expr#30=['high':VARCHAR], expr#31=[CASE($t28, $t29, $t30)], age_range=[$t26], balance_range=[$t31], balance=[$t7]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) \ No newline at end of file From b46dd952adfd6f9b8be94c431e08a17dd7feff56 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 19:10:55 +0800 Subject: [PATCH 09/26] Update CaseRangeAnalyzerTest Signed-off-by: Yuanchun Shen --- .../request/CaseRangeAnalyzerTest.java | 127 +++++++++++------- 1 file changed, 75 insertions(+), 52 deletions(-) diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java index 0a27ef04801..43af091b367 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java @@ -7,7 +7,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -56,12 +55,6 @@ void setUp() { fieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0); // age field } - @Test - void testCreateCaseRangeAnalyzer() { - CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test_agg", rowType); - assertNotNull(analyzer); - } - @Test void testAnalyzeSimpleCaseExpression() { // CASE @@ -279,19 +272,7 @@ void testAnalyzeWithNullElse() { } @Test - void testAnalyzeNonCaseExpression() { - // Test with non-CASE expression - create a simple call that's not CASE - RexLiteral literal = rexBuilder.makeLiteral("test"); - RexCall nonCaseCall = (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.UPPER, literal); - - CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); - Optional result = analyzer.analyze(nonCaseCall); - - assertFalse(result.isPresent()); - } - - @Test - void testAnalyzeWithNonLiteralResult() { + void testAnalyzeWithNonLiteralResultShouldNotSucceed() { // CASE WHEN age >= 18 THEN age ELSE 0 END (non-literal result) RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); @@ -314,7 +295,7 @@ void testAnalyzeWithNonLiteralResult() { } @Test - void testAnalyzeWithDifferentFields() { + void testAnalyzeDifferentFieldsShouldThrow() { // Test comparing different fields in conditions RexInputRef nameFieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(1).getType(), 1); @@ -345,7 +326,7 @@ void testAnalyzeWithDifferentFields() { } @Test - void testAnalyzeWithAndCondition() { + void testAnalyzeWithAndConditionShouldThrow() { // Test AND condition which should be unsupported RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); @@ -371,7 +352,7 @@ void testAnalyzeWithAndCondition() { } @Test - void testAnalyzeWithOrCondition() { + void testAnalyzeWithOrConditionShouldThrow() { // Test OR condition which should be unsupported RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); @@ -465,30 +446,6 @@ void testAnalyzeWithReversedComparison() { assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } - @Test - void testAnalyzeWithInvalidOperands() { - // Test condition with neither field reference nor literal - RexCall invalidCondition = - (RexCall) - rexBuilder.makeCall( - SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, - fieldRef, - fieldRef); // both sides are field refs - - RexLiteral resultLiteral = rexBuilder.makeLiteral("result"); - RexLiteral elseLiteral = rexBuilder.makeLiteral("else"); - - RexCall caseCall = - (RexCall) - rexBuilder.makeCall( - SqlStdOperatorTable.CASE, - Arrays.asList(invalidCondition, resultLiteral, elseLiteral)); - - CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); - - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); - } - @Test void testAnalyzeWithNullLiteralValue() { // Test with null literal value that can't be converted to Double @@ -511,11 +468,6 @@ void testAnalyzeWithNullLiteralValue() { assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); } - @Test - void testDefaultElseKey() { - assertEquals("null", CaseRangeAnalyzer.DEFAULT_ELSE_KEY); - } - @Test void testSimpleCaseGeneratesExpectedDSL() { // CASE WHEN age >= 18 THEN 'adult' ELSE 'minor' END @@ -776,6 +728,77 @@ void testSearchConditionGeneratesExpectedDSL() { assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); } + @Test + void testSearchWithDiscontinuousRanges() { + // age >= 20 && age < 30 -> '20-30' + // age >= 40 && age <50 -> '40-50' + // Create discontinuous ranges: [20, 30) and [40, 50) + TreeRangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(20), BigDecimal.valueOf(30))); + rangeSet.add(Range.closedOpen(BigDecimal.valueOf(40), BigDecimal.valueOf(50))); + + Sarg sarg = Sarg.of(RexUnknownAs.UNKNOWN, rangeSet); + RexNode sargLiteral = + rexBuilder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + + RexCall searchCall = + (RexCall) + rexBuilder.makeCall(SqlStdOperatorTable.SEARCH, Arrays.asList(fieldRef, sargLiteral)); + + RexLiteral targetLiteral = rexBuilder.makeLiteral("target_age"); + RexLiteral otherLiteral = + rexBuilder.makeNullLiteral(typeFactory.createSqlType(SqlTypeName.VARCHAR)); + + RexCall caseCall = + (RexCall) + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, Arrays.asList(searchCall, targetLiteral, otherLiteral)); + + CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("discontinuous_ranges", rowType); + Optional result = analyzer.analyze(caseCall); + + assertTrue(result.isPresent()); + RangeAggregationBuilder builder = result.get(); + + String expectedJson = + """ + { + "discontinuous_ranges" : { + "range" : { + "field" : "age", + "ranges" : [ + { + "key" : "target_age", + "from" : 20.0, + "to" : 30.0 + }, + { + "key" : "target_age", + "from" : 40.0, + "to" : 50.0 + }, + { + "key" : "null", + "to" : 20.0 + }, + { + "key" : "null", + "from" : 30.0, + "to" : 40.0 + }, + { + "key" : "null", + "from" : 50.0 + } + ], + "keyed" : true + } + } + }"""; + + assertEquals(normalizeJson(expectedJson), normalizeJson(builder.toString())); + } + /** * Helper method to normalize JSON strings for comparison by removing extra whitespace and * ensuring consistent formatting. From 0493629f4a89ce261aeffbbe6798178015163386 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 21:56:25 +0800 Subject: [PATCH 10/26] Add a yaml test that replicates issue 4201 Signed-off-by: Yuanchun Shen --- .../agg_composite_date_range_push.yaml | 0 .../agg_composite_date_range_push.yaml | 0 .../rest-api-spec/test/issues/4201.yml | 110 ++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml new file mode 100644 index 00000000000..d90fa438a81 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4201.yml @@ -0,0 +1,110 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + + - do: + indices.create: + index: test + body: + mappings: + properties: + "@timestamp": + type: date + timestamp: + type: date + size: + type: long + tmin: + type: double + metrics: + type: object + properties: + size: + type: long + tmin: + type: double + + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T00:00:00Z", "timestamp": "2025-01-01T00:00:00Z", "size": -20, "tmin": 1.0, "metrics": { "size": -20, "tmin": 1.0 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T01:00:00Z", "timestamp": "2025-01-01T01:00:00Z", "size": 5, "tmin": 2.5, "metrics": { "size": 5, "tmin": 2.5 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T02:00:00Z", "timestamp": "2025-01-01T02:00:00Z", "size": 50, "tmin": 3.2, "metrics": { "size": 50, "tmin": 3.2 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T03:00:00Z", "timestamp": "2025-01-01T03:00:00Z", "size": 500, "tmin": 1.8, "metrics": { "size": 500, "tmin": 1.8 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T04:00:00Z", "timestamp": "2025-01-01T04:00:00Z", "size": 1500, "tmin": 4.1, "metrics": { "size": 1500, "tmin": 4.1 } }' + - '{"index": {}}' + - '{ "@timestamp": "2025-01-01T05:00:00Z", "timestamp": "2025-01-01T05:30:00Z", "size": 3000, "tmin": 2.9, "metrics": { "size": 3000, "tmin": 2.9 } }' + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Test aggregation by range bucket": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source = test + | eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6' + ) + | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax + by range_bucket + + - match: { total: 6 } + - match: { schema: [{"name": "tmin", "type": "double"}, {"name": "tavg", "type": "double"}, {"name": "tmax", "type": "bigint"}, {"name": "range_bucket", "type": "string"}] } + - match: { datarows: [[1.0, -20.0, -20, "range_1"], [2.5, 5.0, 5, "range_2"], [3.2, 50.0, 50, "range_3"], [1.8, 500.0, 500, "range_4"], [4.1, 1500.0, 1500, "range_5"], [2.9, 3000.0, 3000, "range_6"]] } + +--- +"Test aggregation by range bucket and time span": + - skip: + features: + - headers + - allowed_warnings + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: | + source = test + | eval range_bucket = case( + `metrics.size` < -10, 'range_1', + `metrics.size` >= -10 and `metrics.size` < 10, 'range_2', + `metrics.size` >= 10 and `metrics.size` < 100, 'range_3', + `metrics.size` >= 100 and `metrics.size` < 1000, 'range_4', + `metrics.size` >= 1000 and `metrics.size` < 2000, 'range_5', + `metrics.size` >= 2000, 'range_6' + ) + | stats min(`metrics.tmin`) as tmin, avg(`metrics.size`) as tavg, max(`metrics.size`) as tmax + by range_bucket, span(`@timestamp`, 1h) + + - match: { total: 6 } + - match: { schema: [{"name": "tmin", "type": "double"}, {"name": "tavg", "type": "double"}, {"name": "tmax", "type": "bigint"}, {"name": "span(`@timestamp`,1h)", "type": "timestamp"}, {"name": "range_bucket", "type": "string"}] } + - match: { datarows: [[1.0, -20.0, -20, "2025-01-01 00:00:00", "range_1"], [2.5, 5.0, 5, "2025-01-01 01:00:00", "range_2"], [3.2, 50.0, 50, "2025-01-01 02:00:00", "range_3"], [1.8, 500.0, 500, "2025-01-01 03:00:00", "range_4"], [4.1, 1500.0, 1500, "2025-01-01 04:00:00", "range_5"], [2.9, 3000.0, 3000, "2025-01-01 05:00:00", "range_6"]] } From f690addbcc4eaf59228a8fa7effd169264163c84 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 21:59:29 +0800 Subject: [PATCH 11/26] Add integration tests for case in aggregation Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalciteExplainIT.java | 16 +- .../remote/CalcitePPLCaseFunctionIT.java | 253 +++++++++++------- .../agg_composite_date_range_push.yaml | 11 + .../agg_composite_date_range_push.yaml | 15 ++ .../agg/LeafBucketAggregationParser.java | 6 +- 5 files changed, 198 insertions(+), 103 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 0ee2de71fe4..f990c327ea2 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -944,7 +944,7 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { TEST_INDEX_BANK))); // CASE 2: Composite - Range - Metric - // 2.1 Composite(1 field) - Range - Metric + // 2.1 Composite (term) - Range - Metric assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_composite_range_metric_push.yaml"), explainQueryToString( @@ -953,7 +953,15 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " by state, age_range", TEST_INDEX_BANK))); - // 2.2 Composite(2 fields) - Range - Metric (with count) + // 2.2 Composite (date histogram) - Range - Metric + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_composite_date_range_push.yaml"), + explainQueryToString( + "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," + + " 1h)")); + + // 2.3 Composite(2 fields) - Range - Metric (with count) assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_composite2_range_count_push.yaml"), explainQueryToString( @@ -962,7 +970,7 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " avg(balance), count() by age_range, state, gender", TEST_INDEX_BANK))); - // 2.3 Composite (2 fields) - Range - Range - Metric (with count) + // 2.4 Composite (2 fields) - Range - Range - Metric (with count) assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_composite2_range_range_count_push.yaml"), explainQueryToString( @@ -972,7 +980,7 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " avg_balance by age_range, balance_range, state", TEST_INDEX_BANK))); - // 2.4 Should not be pushed because case result expression is not constant + // 2.5 Should not be pushed because case result expression is not constant assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_case_composite_cannot_push.yaml"), explainQueryToString( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index b3439457016..2502c28ce29 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -5,8 +5,8 @@ package org.opensearch.sql.calcite.remote; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -27,6 +27,9 @@ public void init() throws Exception { enableCalcite(); loadIndex(Index.WEBLOG); + loadIndex(Index.TIME_TEST_DATA); + loadIndex(Index.TIME_TEST_DATA_WITH_NULL); + loadIndex(Index.BANK); appendDataForBadResponse(); } @@ -250,122 +253,180 @@ public void testCaseWhenInSubquery() throws IOException { } @Test - public void testCaseRangeAggregationPushdown() throws IOException { - // Test CASE expression that can be optimized to range aggregation - // Note: This has an implicit ELSE NULL, so it won't be optimized - // But it should still work correctly - JSONObject actual = + public void testCaseCanBePushedDownAsRangeQuery() throws IOException { + // CASE 1: Range - Metric + // 1.1 Range - Metric + JSONObject actual1 = executeQuery( String.format( - "source=%s | eval range_bucket = case(" - + " cast(bytes as int) < 1000, 'small'," - + " cast(bytes as int) >= 1000 AND cast(bytes as int) < 5000, 'medium'," - + " cast(bytes as int) >= 5000, 'large'" - + ") | stats count() as total by range_bucket | sort range_bucket", - TEST_INDEX_WEBLOGS)); + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |" + + " stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK)); + verifySchema(actual1, schema("avg_age", "double"), schema("age_range", "string")); + verifyDataRows(actual1, rows(28.0, "u30"), rows(35.0, "u40")); - verifySchema(actual, schema("range_bucket", "string"), schema("total", "bigint")); - - // This should work but won't be optimized due to implicit NULL bucket - assertTrue(actual.getJSONArray("datarows").length() > 0); - } - - @Test - public void testCaseRangeAggregationWithMetrics() throws IOException { - // Test CASE-to-range with additional aggregations - JSONObject actual = + // 1.2 Range - Metric (COUNT) + JSONObject actual2 = executeQuery( String.format( - "source=%s | eval size_category = case( cast(bytes as int) < 2000, 'small', " - + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium', " - + " cast(bytes as int) >= 5000, 'large') | stats count() as total," - + " avg(cast(bytes as int)) as avg_bytes by size_category | sort size_category", - TEST_INDEX_WEBLOGS)); + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40, 'u40'" + + " else 'u100') | stats avg(age) by age_range", + TEST_INDEX_BANK)); + verifySchema(actual2, schema("avg(age)", "double"), schema("age_range", "string")); + verifyDataRows(actual2, rows(28.0, "u30"), rows(35.0, "u40")); + // 1.3 Range - Range - Metric + JSONObject actual3 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100')," + + " balance_range = case(balance < 20000, 'medium' else 'high') | stats" + + " avg(balance) as avg_balance by age_range, balance_range", + TEST_INDEX_BANK)); verifySchema( - actual, - schema("size_category", "string"), - schema("total", "bigint"), - schema("avg_bytes", "double")); - - // Verify we get results for each category - // The exact values may vary based on test data, but structure should be correct - assertEquals(3, actual.getJSONArray("datarows").length()); - } + actual3, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("balance_range", "string")); + verifyDataRows( + actual3, + rows(32838.0, "u30", "high"), + rows(8761.333333333334, "u40", "medium"), + rows(42617.0, "u40", "high")); - @Test - public void testCaseRangeAggregationWithElse() throws IOException { - // Test CASE with explicit ELSE clause - JSONObject actual = + // 1.4 Range - Metric (With null & discontinuous ranges) + JSONObject actual4 = executeQuery( String.format( - "source=%s | eval status_category = case( cast(response as int) < 300, 'success', " - + " cast(response as int) >= 300 AND cast(response as int) < 400, 'redirect', " - + " cast(response as int) >= 400 AND cast(response as int) < 500," - + " 'client_error', cast(response as int) >= 500, 'server_error' else" - + " 'unknown') | stats count() by status_category | sort status_category", - TEST_INDEX_WEBLOGS)); - - verifySchema(actual, schema("status_category", "string"), schema("count()", "bigint")); - - // Should handle the ELSE case for null/non-numeric responses - assertTrue(actual.getJSONArray("datarows").length() > 0); - } + "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age" + + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", + TEST_INDEX_BANK)); + verifySchema(actual4, schema("avg(balance)", "double"), schema("age_range", "string")); + verifyDataRows( + actual4, + rows(32838.0, "u30"), + rows(30497.0, "null"), + rows(20881.333333333332, "30-40 or >=80")); - @Test - public void testNonOptimizableCaseExpression() throws IOException { - // Test CASE that cannot be optimized (different fields) - JSONObject actual = + // 1.5 Should not be pushed because the range is not closed-open + JSONObject actual5 = executeQuery( String.format( - "source=%s | eval mixed_condition = case(" - + " cast(bytes as int) < 1000, 'small_bytes'," - + " cast(response as int) >= 400, 'error_response'" - + " else 'other'" - + ") | stats count() by mixed_condition", - TEST_INDEX_WEBLOGS)); - - verifySchema(actual, schema("mixed_condition", "string"), schema("count()", "bigint")); + "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'" + + " else 'u100') | stats avg(age) as avg_age by age_range", + TEST_INDEX_BANK)); + verifySchema(actual5, schema("avg_age", "double"), schema("age_range", "string")); + verifyDataRows(actual5, rows(35.0, "u40"), rows(28.0, "u30")); - // This should work but won't be optimized - assertTrue(actual.getJSONArray("datarows").length() > 0); - } - - @Test - public void testCaseWithNonLiteralResult() throws IOException { - // Test CASE that cannot be optimized (non-literal results) - JSONObject actual = + // CASE 2: Composite - Range - Metric + // 2.1 Composite (term) - Range - Metric + JSONObject actual6 = executeQuery( String.format( - "source=%s | eval computed_result = case(" - + " cast(bytes as int) < 1000, concat('small_', host)," - + " cast(bytes as int) >= 1000, concat('large_', host)" - + ") | stats count() by computed_result | head 3", - TEST_INDEX_WEBLOGS)); - - verifySchema(actual, schema("computed_result", "string"), schema("count()", "bigint")); + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)" + + " by state, age_range", + TEST_INDEX_BANK)); + verifySchema( + actual6, + schema("avg(balance)", "double"), + schema("state", "string"), + schema("age_range", "string")); + verifyDataRows( + actual6, + rows(39225.0, "IL", "a30"), + rows(48086.0, "IN", "a30"), + rows(4180.0, "MD", "a30"), + rows(40540.0, "PA", "a30"), + rows(5686.0, "TN", "a30"), + rows(32838.0, "VA", "u30"), + rows(16418.0, "WA", "a30")); - // This should work but won't be optimized to range aggregation - assertTrue(actual.getJSONArray("datarows").length() > 0); - } + // 2.2 Composite (date histogram) - Range - Metric + JSONObject actual7 = + executeQuery( + "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," + + " 1h)"); + verifySchema( + actual7, + schema("avg(value)", "double"), + schema("span(@timestamp,1h)", "timestamp"), + schema("value_range", "string")); + // Verify we have results with both small and large ranges and timestamps + assertTrue(actual7.getJSONArray("datarows").length() == 100); + // Verify some sample rows to check data correctness + String resultStr = actual7.toString(); + assertTrue(resultStr.contains("small") && resultStr.contains("large")); + assertTrue(resultStr.contains("2025-07-28") && resultStr.contains("2025-07-29")); - @Test - public void testOptimizableCaseRangeAggregation() throws IOException { - // Test CASE that could be optimized if all ranges are covered with explicit ELSE - JSONObject actual = + // 2.3 Composite(2 fields) - Range - Metric (with count) + JSONObject actual8 = executeQuery( String.format( - "source=%s | eval size_bucket = case(" - + " cast(bytes as int) < 2000, 'small'," - + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium'," - + " cast(bytes as int) >= 5000, 'large'" - + " else 'unknown'" - + ") | stats count() by size_bucket | sort size_bucket", - TEST_INDEX_WEBLOGS)); + "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats" + + " avg(balance), count() by age_range, state, gender", + TEST_INDEX_BANK)); + verifySchema( + actual8, + schema("avg(balance)", "double"), + schema("count()", "bigint"), + schema("age_range", "string"), + schema("state", "string"), + schema("gender", "string")); + verifyDataRows( + actual8, + rows(5686.0, 1, "a30", "TN", "M"), + rows(16418.0, 1, "a30", "WA", "M"), + rows(40540.0, 1, "a30", "PA", "F"), + rows(4180.0, 1, "a30", "MD", "M"), + rows(32838.0, 1, "u30", "VA", "F"), + rows(39225.0, 1, "a30", "IL", "M"), + rows(48086.0, 1, "a30", "IN", "F")); - verifySchema(actual, schema("size_bucket", "string"), schema("count()", "bigint")); + // 2.4 Composite (2 fields) - Range - Range - Metric (with count) + JSONObject actual9 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range =" + + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as" + + " avg_balance by age_range, balance_range, state", + TEST_INDEX_BANK)); + verifySchema( + actual9, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("balance_range", "string"), + schema("state", "string")); + verifyDataRows( + actual9, + rows(39225.0, "u35", "high", "IL"), + rows(48086.0, "u35", "high", "IN"), + rows(4180.0, "u35", "medium", "MD"), + rows(40540.0, "a35", "high", "PA"), + rows(5686.0, "a35", "medium", "TN"), + rows(32838.0, "u35", "high", "VA"), + rows(16418.0, "a35", "medium", "WA")); - // This should work - the explicit ELSE makes it potentially optimizable - assertTrue(actual.getJSONArray("datarows").length() > 0); + // 2.5 Should not be pushed because case result expression is not constant + JSONObject actual10 = + executeQuery( + String.format( + "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)" + + " as avg_balance by age_range, state", + TEST_INDEX_BANK)); + verifySchema( + actual10, + schema("avg_balance", "double"), + schema("age_range", "string"), + schema("state", "string")); + verifyDataRows( + actual10, + rows(32838.0, "u35", "VA"), + rows(4180.0, "u35", "MD"), + rows(48086.0, "u35", "IN"), + rows(40540.0, "virginiaayala@filodyne.com", "PA"), + rows(39225.0, "u35", "IL"), + rows(5686.0, "hattiebond@netagy.com", "TN"), + rows(16418.0, "elinorratliff@scentric.com", "WA")); } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml index e69de29bb2d..3f0b5243356 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_date_range_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$2], span(@timestamp,1h)=[$1], value_range=[$0]) + LogicalAggregate(group=[{0, 2}], avg(value)=[AVG($1)]) + LogicalProject(value_range=[$10], value=[$2], span(@timestamp,1h)=[SPAN($0, 1, 'h')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[FILTER->IS NOT NULL($0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2},avg(value)=AVG($1)), PROJECT->[avg(value), span(@timestamp,1h), value_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"exists":{"field":"@timestamp","boost":1.0}},"sort":[],"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(@timestamp,1h)":{"date_histogram":{"field":"@timestamp","missing_bucket":false,"order":"asc","fixed_interval":"1h"}}}]},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"large","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml index e69de29bb2d..f99713d9aaa 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_composite_date_range_push.yaml @@ -0,0 +1,15 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$2], span(@timestamp,1h)=[$1], value_range=[$0]) + LogicalAggregate(group=[{0, 2}], avg(value)=[AVG($1)]) + LogicalProject(value_range=[$10], value=[$2], span(@timestamp,1h)=[SPAN($0, 1, 'h')]) + LogicalFilter(condition=[IS NOT NULL($0)]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], timestamp=[$3], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'large':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg(value)=[$t9], span(@timestamp,1h)=[$t1], value_range=[$t0]) + EnumerableAggregate(group=[{0, 2}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) + EnumerableCalc(expr#0..9=[{inputs}], expr#10=[7000], expr#11=[<($t2, $t10)], expr#12=['small':VARCHAR], expr#13=['large':VARCHAR], expr#14=[CASE($t11, $t12, $t13)], expr#15=[1], expr#16=['h'], expr#17=[SPAN($t0, $t15, $t16)], expr#18=[IS NOT NULL($t0)], value_range=[$t14], value=[$t2], span(@timestamp,1h)=[$t17], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index ca6d6c0eb49..7cd414f39eb 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -19,8 +19,8 @@ import org.opensearch.search.aggregations.bucket.range.Range; /** - * Use BucketAggregationParser only when there is a single group-by key, it returns multiple - * buckets. {@link CompositeAggregationParser} is used for multiple group by keys + * Use LeafBucketAggregationParser only when there is a single group-by key, it returns multiple + * buckets. {@link BucketAggregationParser} is used for multiple group by keys */ @EqualsAndHashCode public class LeafBucketAggregationParser implements OpenSearchAggregationResponseParser { @@ -36,7 +36,7 @@ public LeafBucketAggregationParser(List metricParserList) { metricsParser = new MetricParserHelper(metricParserList); } - /** CompositeAggregationParser with count aggregation name list, used in v3 */ + /** BucketAggregationParser with count aggregation name list, used in v3 */ public LeafBucketAggregationParser( List metricParserList, List countAggNameList) { metricsParser = new MetricParserHelper(metricParserList); From b204b2ee0ea1a4338bea76980422f4a974ff76b1 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 22:49:20 +0800 Subject: [PATCH 12/26] Fix unit tests Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalcitePPLCaseFunctionIT.java | 7 ++++--- .../response/agg/LeafBucketAggregationParser.java | 3 ++- .../opensearch/request/AggregateAnalyzerTest.java | 12 +++++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index 2502c28ce29..b0081bbc7a1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -8,6 +8,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; +import static org.opensearch.sql.util.MatcherUtils.closeTo; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -304,9 +305,9 @@ public void testCaseCanBePushedDownAsRangeQuery() throws IOException { verifySchema(actual4, schema("avg(balance)", "double"), schema("age_range", "string")); verifyDataRows( actual4, - rows(32838.0, "u30"), - rows(30497.0, "null"), - rows(20881.333333333332, "30-40 or >=80")); + closeTo(32838.0, "u30"), + closeTo(30497.0, "null"), + closeTo(20881.333333333332, "30-40 or >=80")); // 1.5 Should not be pushed because the range is not closed-open JSONObject actual5 = diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index 7cd414f39eb..fb474461305 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -64,8 +64,9 @@ private Map parse(MultiBucketsAggregation.Bucket bucket, String if (bucket.getDocCount() == 0) { return null; } - result.put(name, bucket.getKey()); } + // TODO: Should we add all bucket key to agg name? It does not always seem necessary. + result.put(name, bucket.getKey()); countAggNameList.forEach(n -> result.put(n, bucket.getDocCount())); return result; } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java index 602ee4b9d20..86fb7992d46 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/AggregateAnalyzerTest.java @@ -46,8 +46,9 @@ import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType; import org.opensearch.sql.opensearch.request.AggregateAnalyzer.ExpressionNotAnalyzableException; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.FilterParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParserHelper; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -280,9 +281,14 @@ void analyze_groupBy() throws ExpressionNotAnalyzableException { + "{\"a\":{\"terms\":{\"field\":\"a\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}," + "{\"b\":{\"terms\":{\"field\":\"b.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]}}}]", result.getLeft().toString()); - assertInstanceOf(CompositeAggregationParser.class, result.getRight()); + assertInstanceOf(BucketAggregationParser.class, result.getRight()); + assertInstanceOf( + LeafBucketAggregationParser.class, + ((BucketAggregationParser) result.getRight()).getSubAggParser()); MetricParserHelper metricsParser = - ((CompositeAggregationParser) result.getRight()).getMetricsParser(); + ((LeafBucketAggregationParser) + ((BucketAggregationParser) result.getRight()).getSubAggParser()) + .getMetricsParser(); assertEquals(1, metricsParser.getMetricParserMap().size()); metricsParser .getMetricParserMap() From 4dc86db296f579e99b72dc575ab49e16095b649f Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sun, 28 Sep 2025 23:20:40 +0800 Subject: [PATCH 13/26] Add a patch to CalcitePPLCaseFunctionIT Signed-off-by: Yuanchun Shen --- .../remote/CalcitePPLCaseFunctionIT.java | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index b0081bbc7a1..e545fccdaf7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -5,7 +5,6 @@ package org.opensearch.sql.calcite.remote; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.closeTo; @@ -292,7 +291,7 @@ public void testCaseCanBePushedDownAsRangeQuery() throws IOException { verifyDataRows( actual3, rows(32838.0, "u30", "high"), - rows(8761.333333333334, "u40", "medium"), + closeTo(8761.333333333334, "u40", "medium"), rows(42617.0, "u40", "high")); // 1.4 Range - Metric (With null & discontinuous ranges) @@ -303,11 +302,20 @@ public void testCaseCanBePushedDownAsRangeQuery() throws IOException { + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", TEST_INDEX_BANK)); verifySchema(actual4, schema("avg(balance)", "double"), schema("age_range", "string")); - verifyDataRows( - actual4, - closeTo(32838.0, "u30"), - closeTo(30497.0, "null"), - closeTo(20881.333333333332, "30-40 or >=80")); + // There's such a discrepancy because null cannot be the key for a range query + if (isPushdownDisabled()) { + verifyDataRows( + actual4, + rows(32838.0, "u30"), + rows(30497.0, null), + closeTo(20881.333333333332, "30-40 or >=80")); + } else { + verifyDataRows( + actual4, + rows(32838.0, "u30"), + rows(30497.0, "null"), + closeTo(20881.333333333332, "30-40 or >=80")); + } // 1.5 Should not be pushed because the range is not closed-open JSONObject actual5 = @@ -318,7 +326,10 @@ public void testCaseCanBePushedDownAsRangeQuery() throws IOException { TEST_INDEX_BANK)); verifySchema(actual5, schema("avg_age", "double"), schema("age_range", "string")); verifyDataRows(actual5, rows(35.0, "u40"), rows(28.0, "u30")); + } + @Test + public void testCaseCanBePushedDownAsCompositeRangeQuery() throws IOException { // CASE 2: Composite - Range - Metric // 2.1 Composite (term) - Range - Metric JSONObject actual6 = @@ -347,18 +358,19 @@ public void testCaseCanBePushedDownAsRangeQuery() throws IOException { executeQuery( "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," - + " 1h)"); + + " 1month)"); verifySchema( actual7, schema("avg(value)", "double"), - schema("span(@timestamp,1h)", "timestamp"), + schema("span(@timestamp,1month)", "timestamp"), schema("value_range", "string")); - // Verify we have results with both small and large ranges and timestamps - assertTrue(actual7.getJSONArray("datarows").length() == 100); - // Verify some sample rows to check data correctness - String resultStr = actual7.toString(); - assertTrue(resultStr.contains("small") && resultStr.contains("large")); - assertTrue(resultStr.contains("2025-07-28") && resultStr.contains("2025-07-29")); + + verifyDataRows( + actual7, + closeTo(6642.521739130435, "2025-07-01 00:00:00", "small"), + closeTo(8381.917808219177, "2025-07-01 00:00:00", "large"), + rows(6489.0, "2025-08-01 00:00:00", "small"), + rows(8375.0, "2025-08-01 00:00:00", "large")); // 2.3 Composite(2 fields) - Range - Metric (with count) JSONObject actual8 = From d38a916ae42546b7365e47df5fbe3f1265ce2ccb Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 10:24:40 +0800 Subject: [PATCH 14/26] Migrate all composite aggregation parser usage to bucket aggregate parser Signed-off-by: Yuanchun Shen --- .../sql/opensearch/request/CaseRangeAnalyzer.java | 2 +- .../opensearch/response/agg/BucketAggregationParser.java | 5 ++++- .../storage/script/aggregation/AggregationQueryBuilder.java | 5 +++-- .../storage/scan/OpenSearchIndexScanOptimizationTest.java | 6 ++++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index 926936e0143..a33fca19e22 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -171,7 +171,7 @@ private void analyzeSimpleComparison(RexCall call, String key) { addTo(key, value); } default -> throw new UnsupportedOperationException( - "ranges must equivalents of field >= constant or field < constant"); + "ranges must be equivalents of field >= constant or field < constant"); } ; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index 6f98e6620a0..ca40ccccf9c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -8,6 +8,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregation; @@ -16,8 +17,10 @@ import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; import org.opensearch.search.aggregations.bucket.range.Range; +@Getter +@EqualsAndHashCode public class BucketAggregationParser implements OpenSearchAggregationResponseParser { - @Getter private final OpenSearchAggregationResponseParser subAggParser; + private final OpenSearchAggregationResponseParser subAggParser; public BucketAggregationParser(OpenSearchAggregationResponseParser subAggParser) { this.subAggParser = subAggParser; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index 408511fde3f..34b5bdd99e7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -29,7 +29,8 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -95,7 +96,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { bucketNullable)) .subAggregations(metrics.getLeft()) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metrics.getRight())); + new BucketAggregationParser(new LeafBucketAggregationParser(metrics.getRight()))); } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 06cc0b82fd7..2618c5325b7 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -74,7 +74,8 @@ import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; -import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; +import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; @@ -803,7 +804,8 @@ private Runnable withAggregationPushedDown( .size(AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE); List aggBuilders = Collections.singletonList(aggBuilder); responseParser = - new CompositeAggregationParser(new SingleValueParser(aggregation.aggregateName)); + new BucketAggregationParser( + new LeafBucketAggregationParser(new SingleValueParser(aggregation.aggregateName))); return () -> { verify(requestBuilder, times(1)).pushDownAggregation(Pair.of(aggBuilders, responseParser)); From 6beed21945ca4f3da5e28bbc20c3d8c2b3114cc3 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 11:17:27 +0800 Subject: [PATCH 15/26] Create a parent abstract classes for BucketAggregationParsers Signed-off-by: Yuanchun Shen --- .../agg/AbstractBucketAggregationParser.java | 63 +++++++++++++++++++ .../response/agg/BucketAggregationParser.java | 46 +++++++++----- .../agg/LeafBucketAggregationParser.java | 19 +++--- 3 files changed, 101 insertions(+), 27 deletions(-) create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java new file mode 100644 index 00000000000..1098ccd4666 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/AbstractBucketAggregationParser.java @@ -0,0 +1,63 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.List; +import java.util.Map; +import org.opensearch.search.SearchHits; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.range.Range; + +/** + * Abstract base class for parsing bucket aggregations from OpenSearch responses. Provides common + * functionality for extracting key-value pairs from different types of buckets. + */ +public abstract class AbstractBucketAggregationParser + implements OpenSearchAggregationResponseParser { + /** + * Extracts key-value pairs from a composite aggregation bucket without processing its + * sub-aggregations. + * + *

For example, for the following CompositeAggregation bucket in response: + * + *

{@code
+   * {
+   *   "key": {
+   *     "firstname": "William",
+   *     "lastname": "Shakespeare"
+   *   },
+   *   "sub_agg_name": {
+   *     "buckets": []
+   *   }
+   * }
+   * }
+ * + * It returns {@code {"firstname": "William", "lastname": "Shakespeare"}} as the response. + * + * @param bucket the composite aggregation bucket to extract data from + * @return a map containing the bucket's key-value pairs + */ + protected Map extract(CompositeAggregation.Bucket bucket) { + return bucket.getKey(); + } + + /** + * Extracts key-value pairs from a range aggregation bucket without processing its + * sub-aggregations. + * + * @param bucket the range aggregation bucket to extract data from + * @param name the name to use as the key in the returned map + * @return a map containing the bucket's key mapped to the provided name + */ + protected Map extract(Range.Bucket bucket, String name) { + return Map.of(name, bucket.getKey()); + } + + @Override + public List> parse(SearchHits hits) { + throw new UnsupportedOperationException(this.getClass() + " doesn't support parse(SearchHits)"); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index ca40ccccf9c..13721ecedd3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -5,27 +5,48 @@ package org.opensearch.sql.opensearch.response.agg; -import java.util.HashMap; import java.util.List; import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; -import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; import org.opensearch.search.aggregations.bucket.range.Range; +/** + * Parser for bucket aggregations that contain sub-aggregations. This parser handles multiple levels + * of multi-bucket aggregations by delegates sublevels to sub-parsers. + * + *

Please note that it does not handle metric or value count responses -- they should be parsed + * only in {@link LeafBucketAggregationParser}. + */ @Getter -@EqualsAndHashCode -public class BucketAggregationParser implements OpenSearchAggregationResponseParser { - private final OpenSearchAggregationResponseParser subAggParser; +@EqualsAndHashCode(callSuper = false) +public class BucketAggregationParser extends AbstractBucketAggregationParser { + /** The sub-aggregation parser used to process nested aggregations within each bucket. */ + private final AbstractBucketAggregationParser subAggParser; - public BucketAggregationParser(OpenSearchAggregationResponseParser subAggParser) { + /** + * Constructs a new BucketAggregationParser with the specified sub-aggregation parser. + * + * @param subAggParser the parser to handle sublevel multi-bucket aggregations within each bucket + */ + public BucketAggregationParser(AbstractBucketAggregationParser subAggParser) { this.subAggParser = subAggParser; } + /** + * Parses the provided aggregations into a list of maps containing the aggregated data. This + * method handles multi-bucket aggregations by processing each bucket and merging the results with + * bucket-specific key information. + * + * @param aggregations the aggregations to parse + * @return a list of maps containing the parsed aggregation data + * @throws IllegalStateException if the aggregation type is not supported or if the sub-parser + * type is invalid + */ @Override public List> parse(Aggregations aggregations) { if (subAggParser instanceof BucketAggregationParser) { @@ -51,21 +72,16 @@ public List> parse(Aggregations aggregations) { private List> parse(MultiBucketsAggregation.Bucket bucket, String name) { List> results = subAggParser.parse(bucket.getAggregations()); if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { - Map common = new HashMap<>(compositeBucket.getKey()); + Map common = extract(compositeBucket); for (Map r : results) { r.putAll(common); } - } else if (bucket instanceof Range.Bucket) { + } else if (bucket instanceof Range.Bucket rangeBucket) { + Map common = extract(rangeBucket, name); for (Map r : results) { - r.put(name, bucket.getKey()); + r.putAll(common); } } return results; } - - @Override - public List> parse(SearchHits hits) { - throw new UnsupportedOperationException( - "BucketAggregationParser doesn't support parse(SearchHits)"); - } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java index fb474461305..b02c5720bcf 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/LeafBucketAggregationParser.java @@ -11,7 +11,6 @@ import java.util.Objects; import lombok.EqualsAndHashCode; import lombok.Getter; -import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; @@ -19,11 +18,13 @@ import org.opensearch.search.aggregations.bucket.range.Range; /** - * Use LeafBucketAggregationParser only when there is a single group-by key, it returns multiple - * buckets. {@link BucketAggregationParser} is used for multiple group by keys + * Parser for leaf-level bucket aggregations that may contain metric information but no nested + * multi-bucket aggregations. + * + *

For aggregations with nested bucket structures, use {@link BucketAggregationParser} instead. */ -@EqualsAndHashCode -public class LeafBucketAggregationParser implements OpenSearchAggregationResponseParser { +@EqualsAndHashCode(callSuper = false) +public class LeafBucketAggregationParser extends AbstractBucketAggregationParser { @Getter private final MetricParserHelper metricsParser; // countAggNameList dedicated the list of count aggregations which are filled by doc_count private List countAggNameList = List.of(); @@ -50,16 +51,10 @@ public List> parse(Aggregations aggregations) { .getBuckets().stream().map(b -> parse(b, agg.getName())).filter(Objects::nonNull).toList(); } - @Override - public List> parse(SearchHits hits) { - throw new UnsupportedOperationException( - "LeafBucketAggregationParser doesn't support parse(SearchHits)"); - } - private Map parse(MultiBucketsAggregation.Bucket bucket, String name) { Map result = metricsParser.parse(bucket.getAggregations()); if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { - result.putAll(compositeBucket.getKey()); + result.putAll(extract(compositeBucket)); } else if (bucket instanceof Range.Bucket) { if (bucket.getDocCount() == 0) { return null; From d40c24435c3ca19c7b5f153a372dfd4dcbdd1412 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 11:29:31 +0800 Subject: [PATCH 16/26] Remove an unnecessary bucket agg in AggregationQueryBuilder Signed-off-by: Yuanchun Shen --- .../storage/script/aggregation/AggregationQueryBuilder.java | 3 +-- .../storage/scan/OpenSearchIndexScanOptimizationTest.java | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index 34b5bdd99e7..485639804f3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -29,7 +29,6 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; -import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; @@ -96,7 +95,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { bucketNullable)) .subAggregations(metrics.getLeft()) .size(AGGREGATION_BUCKET_SIZE)), - new BucketAggregationParser(new LeafBucketAggregationParser(metrics.getRight()))); + new LeafBucketAggregationParser(metrics.getRight())); } } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java index 2618c5325b7..644ecdc33da 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanOptimizationTest.java @@ -74,7 +74,6 @@ import org.opensearch.sql.expression.function.OpenSearchFunctions; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; -import org.opensearch.sql.opensearch.response.agg.BucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.LeafBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; import org.opensearch.sql.opensearch.response.agg.SingleValueParser; @@ -804,8 +803,7 @@ private Runnable withAggregationPushedDown( .size(AggregationQueryBuilder.AGGREGATION_BUCKET_SIZE); List aggBuilders = Collections.singletonList(aggBuilder); responseParser = - new BucketAggregationParser( - new LeafBucketAggregationParser(new SingleValueParser(aggregation.aggregateName))); + new LeafBucketAggregationParser(new SingleValueParser(aggregation.aggregateName)); return () -> { verify(requestBuilder, times(1)).pushDownAggregation(Pair.of(aggBuilders, responseParser)); From 606e346aa279c74ea71135108d3aea9ee1b511d5 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 13:40:34 +0800 Subject: [PATCH 17/26] Test pushing down case where there exists null values Signed-off-by: Yuanchun Shen --- .../remote/CalcitePPLCaseFunctionIT.java | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index e545fccdaf7..febb9218a9c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY_WITH_NULL; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.closeTo; import static org.opensearch.sql.util.MatcherUtils.rows; @@ -28,7 +29,7 @@ public void init() throws Exception { loadIndex(Index.WEBLOG); loadIndex(Index.TIME_TEST_DATA); - loadIndex(Index.TIME_TEST_DATA_WITH_NULL); + loadIndex(Index.STATE_COUNTRY_WITH_NULL); loadIndex(Index.BANK); appendDataForBadResponse(); } @@ -442,4 +443,31 @@ public void testCaseCanBePushedDownAsCompositeRangeQuery() throws IOException { rows(5686.0, "hattiebond@netagy.com", "TN"), rows(16418.0, "elinorratliff@scentric.com", "WA")); } + + @Test + public void testCaseAggWithNullValues() throws IOException { + JSONObject actual = + executeQuery( + String.format( + "source=%s" + + "| eval age_category = case(" + + " age < 20, 'teenager'," + + " age < 70, 'adult'," + + " age >= 70, 'senior'" + + " else 'unknown')" + + "| stats avg(age) by age_category", + TEST_INDEX_STATE_COUNTRY_WITH_NULL)); + verifySchema(actual, schema("avg(age)", "double"), schema("age_category", "string")); + // TODO: There is such discrepancy because range aggregations will ignore null values + if (isPushdownDisabled()) { + verifyDataRows( + actual, + rows(10, "teenager"), + rows(25, "adult"), + rows(70, "senior"), + rows(null, "unknown")); + } else { + verifyDataRows(actual, rows(10, "teenager"), rows(25, "adult"), rows(70, "senior")); + } + } } From a5fdd662f93db8ec11bb97a9f7421f97e6b8bb6d Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 14:34:25 +0800 Subject: [PATCH 18/26] Return empty in CaseRangeAnalyzer to unblock the rest pushdown - Additionally test number as result expressions Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalciteExplainIT.java | 13 ++++++++- .../calcite/agg_case_cannot_push.yaml | 6 +---- .../agg_case_composite_cannot_push.yaml | 6 +---- .../calcite/agg_case_num_res_cannot_push.yaml | 9 +++++++ .../agg_case_num_res_cannot_push.yaml | 13 +++++++++ .../opensearch/request/CaseRangeAnalyzer.java | 27 +++++++++++++++---- .../request/CaseRangeAnalyzerTest.java | 25 ++++++++++------- 7 files changed, 73 insertions(+), 26 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml create mode 100644 integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index f990c327ea2..b860a3dde45 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -943,6 +943,17 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " else 'u100') | stats avg(age) as avg_age by age_range", TEST_INDEX_BANK))); + // 1.6 Should not be pushed as range query because the result expression is not a string + // literal. + // Range aggregation keys must be strings + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_case_num_res_cannot_push.yaml"), + explainQueryToString( + String.format( + "source=%s | eval age_range = case(age < 30, 30 else 100) | stats count() by" + + " age_range", + TEST_INDEX_BANK))); + // CASE 2: Composite - Range - Metric // 2.1 Composite (term) - Range - Metric assertYamlEqualsJsonIgnoreId( @@ -980,7 +991,7 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " avg_balance by age_range, balance_range, state", TEST_INDEX_BANK))); - // 2.5 Should not be pushed because case result expression is not constant + // 2.5 Should not be pushed down as range query because case result expression is not constant assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_case_composite_cannot_push.yaml"), explainQueryToString( diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml index 66e665267d3..02f92021453 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -6,8 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:BIGINT], expr#6=[CASE($t4, $t5, $t1)], expr#7=[CAST($t6):DOUBLE], expr#8=[/($t7, $t2)], avg_age=[$t8], age_range=[$t0]) - EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)]) - EnumerableCalc(expr#0=[{inputs}], expr#1=[30], expr#2=[<($t0, $t1)], expr#3=['u30':VARCHAR], expr#4=[Sarg[[30..40]]], expr#5=[SEARCH($t0, $t4)], expr#6=['u40':VARCHAR], expr#7=['u100':VARCHAR], expr#8=[CASE($t2, $t3, $t5, $t6, $t7)], $f0=[$t8], age=[$t0]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0BXp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMTAsCiAgICAgICAgICAibmFtZSI6ICIkMTAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJjbG9zZWQiLAogICAgICAgICAgICAgICAgIjMwIiwKICAgICAgICAgICAgICAgICI0MCIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVU5LTk9XTiIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1NDAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTEwMCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAADXQADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZX5xAH4AC3QABlNUUklOR3QAB2FkZHJlc3NzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AFHhwfnEAfgALdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAx0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAfeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAhAAAAAHNxAH4AAAAAAAN3BAAAAAB4dAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHEAfgAVfnEAfgALdAAJVElNRVNUQU1QfnEAfgAbdAAERGF0ZXEAfgAgc3EAfgAAAAAAAXcEAAAAAHh0AAZnZW5kZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4AFXEAfgAQfnEAfgAbdAAHS2V5d29yZHEAfgAgeHQABGNpdHlxAH4AEHQACGxhc3RuYW1lcQB+ABB0AAdiYWxhbmNlcQB+AA10AAhlbXBsb3llcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAFc3RhdGVzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACcQB+ADFxAH4AMnh0AANhZ2V+cQB+AAt0AAdJTlRFR0VSdAAFZW1haWxzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABG1hbGV+cQB+AAt0AAdCT09MRUFOeAB4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml index bd197c4f797..7b64abe160c 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -6,8 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_balance=[$t9], age_range=[$t0], state=[$t1]) - EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)]) - EnumerableCalc(expr#0..3=[{inputs}], expr#4=[35], expr#5=[<($t0, $t4)], expr#6=['u35':VARCHAR], expr#7=[CASE($t5, $t6, $t1)], $f0=[$t7], state=[$t2], balance=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[age, email, state, balance]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["age","email","state","balance"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0An97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMTEsCiAgICAgICJuYW1lIjogIiQxMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAF2phdmEudXRpbC5MaW5rZWRIYXNoTWFwNMBOXBBswPsCAAFaAAthY2Nlc3NPcmRlcnhyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAABh3CAAAACAAAAANdAAOYWNjb3VudF9udW1iZXJ+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAAJZmlyc3RuYW1lfnEAfgALdAAGU1RSSU5HdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAUeHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+AB94cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ACEAAAAAc3EAfgAAAAAAA3cEAAAAAHh0AAliaXJ0aGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cQB+ABV+cQB+AAt0AAlUSU1FU1RBTVB+cQB+ABt0AAREYXRlcQB+ACBzcQB+AAAAAAABdwQAAAAAeHQABmdlbmRlcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAVcQB+ABB+cQB+ABt0AAdLZXl3b3JkcQB+ACB4dAAEY2l0eXEAfgAQdAAIbGFzdG5hbWVxAH4AEHQAB2JhbGFuY2VxAH4ADXQACGVtcGxveWVyc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AAVzdGF0ZXNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJxAH4AMXEAfgAyeHQAA2FnZX5xAH4AC3QAB0lOVEVHRVJ0AAVlbWFpbHNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAEbWFsZX5xAH4AC3QAB0JPT0xFQU54AHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml new file mode 100644 index 00000000000..544bacdcc9c --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0Ap97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEwMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgAXamF2YS51dGlsLkxpbmtlZEhhc2hNYXA0wE5cEGzA+wIAAVoAC2FjY2Vzc09yZGVyeHIAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAAGHcIAAAAIAAAAA10AA5hY2NvdW50X251bWJlcn5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABExPTkd0AAlmaXJzdG5hbWV+cQB+AAt0AAZTVFJJTkd0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABR4cH5xAH4AC3QAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgAMdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AH3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AIQAAAABzcQB+AAAAAAADdwQAAAAAeHQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hxAH4AFX5xAH4AC3QACVRJTUVTVEFNUH5xAH4AG3QABERhdGVxAH4AIHNxAH4AAAAAAAF3BAAAAAB4dAAGZ2VuZGVyc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABVxAH4AEH5xAH4AG3QAB0tleXdvcmRxAH4AIHh0AARjaXR5cQB+ABB0AAhsYXN0bmFtZXEAfgAQdAAHYmFsYW5jZXEAfgANdAAIZW1wbG95ZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABXN0YXRlc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnEAfgAxcQB+ADJ4dAADYWdlfnEAfgALdAAHSU5URUdFUnQABWVtYWlsc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AARtYWxlfnEAfgALdAAHQk9PTEVBTngAeA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml new file mode 100644 index 00000000000..46035abe925 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_case_num_res_cannot_push.yaml @@ -0,0 +1,13 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(count()=[$1], age_range=[$0]) + LogicalAggregate(group=[{0}], count()=[COUNT()]) + LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..1=[{inputs}], count()=[$t1], age_range=[$t0]) + EnumerableAggregate(group=[{0}], count()=[COUNT()]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[30], expr#20=[<($t10, $t19)], expr#21=[100], expr#22=[CASE($t20, $t19, $t21)], age_range=[$t22]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java index a33fca19e22..104ab04e547 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzer.java @@ -77,12 +77,12 @@ public Optional analyze(RexCall caseCall) { for (int i = 0; i < operands.size() - 1; i += 2) { RexNode condition = operands.get(i); RexNode expr = operands.get(i + 1); - // Result must be a literal - if (!(expr instanceof RexLiteral)) { + try { + String key = parseLiteralAsString(expr); + analyzeCondition(condition, key); + } catch (UnsupportedOperationException e) { return Optional.empty(); } - String key = ((RexLiteral) expr).getValueAs(String.class); - analyzeCondition(condition, key); } // Check ELSE clause @@ -92,7 +92,11 @@ public Optional analyze(RexCall caseCall) { // range key doesn't support values of type: VALUE_NULL elseKey = DEFAULT_ELSE_KEY; } else { - elseKey = ((RexLiteral) elseExpr).getValueAs(String.class); + try { + elseKey = parseLiteralAsString(elseExpr); + } catch (UnsupportedOperationException e) { + return Optional.empty(); + } } addRangeSet(elseKey, takenRange.complement()); return Optional.of(builder); @@ -262,6 +266,19 @@ private static void validateRange(Range range) { } } + private static String parseLiteralAsString(RexNode node) { + if (!(node instanceof RexLiteral)) { + throwUnsupported("Result expressions of range queries must be literals"); + } + RexLiteral literal = (RexLiteral) node; + try { + return literal.getValueAs(String.class); + } catch (AssertionError ignore) { + } + throw new UnsupportedOperationException( + "Cannot parse result expression of type " + literal.getType()); + } + private static void throwUnsupported() { throw new UnsupportedOperationException("Cannot create range aggregator from case"); } diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java index 43af091b367..505db011f7b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/CaseRangeAnalyzerTest.java @@ -7,7 +7,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.google.common.collect.Range; @@ -295,7 +294,7 @@ void testAnalyzeWithNonLiteralResultShouldNotSucceed() { } @Test - void testAnalyzeDifferentFieldsShouldThrow() { + void testAnalyzeDifferentFieldsShouldReturnEmpty() { // Test comparing different fields in conditions RexInputRef nameFieldRef = rexBuilder.makeInputRef(rowType.getFieldList().get(1).getType(), 1); @@ -321,12 +320,13 @@ void testAnalyzeDifferentFieldsShouldThrow() { Arrays.asList(condition1, result1, condition2, result2, elseResult)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } @Test - void testAnalyzeWithAndConditionShouldThrow() { + void testAnalyzeWithAndConditionShouldReturnEmpty() { // Test AND condition which should be unsupported RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); @@ -347,12 +347,13 @@ void testAnalyzeWithAndConditionShouldThrow() { SqlStdOperatorTable.CASE, Arrays.asList(andCondition, resultLiteral, elseLiteral)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } @Test - void testAnalyzeWithOrConditionShouldThrow() { + void testAnalyzeWithOrConditionShouldReturnEmpty() { // Test OR condition which should be unsupported RexLiteral literal18 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(18)); RexLiteral literal65 = rexBuilder.makeExactLiteral(BigDecimal.valueOf(65)); @@ -373,8 +374,9 @@ void testAnalyzeWithOrConditionShouldThrow() { SqlStdOperatorTable.CASE, Arrays.asList(orCondition, resultLiteral, elseLiteral)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } @Test @@ -395,8 +397,9 @@ void testAnalyzeWithUnsupportedComparison() { SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } @Test @@ -464,8 +467,9 @@ void testAnalyzeWithNullLiteralValue() { SqlStdOperatorTable.CASE, Arrays.asList(condition, resultLiteral, elseLiteral)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } @Test @@ -833,7 +837,8 @@ void testAnalyzeSearchConditionWithInvalidField() { SqlStdOperatorTable.CASE, Arrays.asList(searchCall, resultLiteral, elseLiteral)); CaseRangeAnalyzer analyzer = CaseRangeAnalyzer.create("test", rowType); + Optional result = analyzer.analyze(caseCall); - assertThrows(UnsupportedOperationException.class, () -> analyzer.analyze(caseCall)); + assertFalse(result.isPresent()); } } From fdb9886320a940c2aa110e59ae4fbe3283a32563 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Mon, 29 Sep 2025 17:12:01 +0800 Subject: [PATCH 19/26] Document limitations of pushding case as range queries Signed-off-by: Yuanchun Shen --- docs/user/ppl/functions/condition.rst | 10 ++++++++++ .../sql/calcite/remote/CalcitePPLCaseFunctionIT.java | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index a2d10a014b3..817ad3fdabf 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -227,6 +227,16 @@ Argument type: all the supported data type, (NOTE : there is no comma before "el Return type: any +Limitations +>>>>>>>>>>> + +When each condition is a field comparison with a numeric literal and each result expression is a string literal, the query will be optimized as `range aggregations `_ if pushdown optimization is enabled. However, this optimization has the following limitations: + +- Null values will not be grouped into any bucket of a range aggregation and will be ignored +- The default ELSE clause will use the string literal ``"null"`` instead of actual NULL values + +To avoid these edge-case limitations, set ``plugins.calcite.pushdown.enabled`` to false. + Example:: os> source=accounts | eval result = case(age > 35, firstname, age < 30, lastname else employer) | fields result, firstname, lastname, age, employer diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index febb9218a9c..719ed9c39c8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -458,7 +458,7 @@ public void testCaseAggWithNullValues() throws IOException { + "| stats avg(age) by age_category", TEST_INDEX_STATE_COUNTRY_WITH_NULL)); verifySchema(actual, schema("avg(age)", "double"), schema("age_category", "string")); - // TODO: There is such discrepancy because range aggregations will ignore null values + // There is such discrepancy because range aggregations will ignore null values if (isPushdownDisabled()) { verifyDataRows( actual, From 0ca81aaee5114c71e446599aa820fde073a74b31 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sat, 11 Oct 2025 17:34:18 +0800 Subject: [PATCH 20/26] Make case pushdown a private method Signed-off-by: Yuanchun Shen --- .../opensearch/request/AggregateAnalyzer.java | 94 +++++++++++++------ 1 file changed, 63 insertions(+), 31 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 4d8aa1e32db..56e50a6151a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -211,14 +211,6 @@ public static Pair, OpenSearchAggregationResponseParser Builder metricBuilder = builderAndParser.getLeft(); List metricParsers = builderAndParser.getRight(); - List> groupsByCase = - analyzeCaseInProject(groupList, project, rowType); - // Remove groups that are converted to ranges from groupList - Set toRemoveFromGroupList = - groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); - // The group-by list after removing CASE that can be converted to range queries - groupList = groupList.stream().filter(i -> !toRemoveFromGroupList.contains(i)).toList(); - // both count() and count(FIELD) can apply doc_count optimization in non-bucket aggregation, // but only count() can apply doc_count optimization in bucket aggregation. boolean countAllOnly = !groupList.isEmpty(); @@ -227,24 +219,13 @@ public static Pair, OpenSearchAggregationResponseParser Builder newMetricBuilder = countAggNameAndBuilderPair.getRight(); List countAggNames = countAggNameAndBuilderPair.getLeft(); - // Cascade aggregations in such a way: - // RangeAggregation - // ...Any other range aggregations - // Metric Aggregation comes at last - // Note that but a composite aggregation can not be a sub aggregation of range aggregation, - // but range aggregation can be a sub aggregation of a composite aggregation. - AggregationBuilder rangeAggregationBuilder = null; - if (!groupsByCase.isEmpty()) { - for (int i = 0; i < groupsByCase.size(); i++) { - Pair p = groupsByCase.get(i); - if (i == 0) { - rangeAggregationBuilder = p.getRight(); - } else { - groupsByCase.get(i - 1).getRight().subAggregation(p.getRight()); - } - } - groupsByCase.getLast().getRight().subAggregations(newMetricBuilder); - } + Pair, AggregationBuilder> caseAggPushedAndRangeBuilder = + pushCaseAsRanges(groupList, project, rowType, newMetricBuilder); + // Remove groups that are converted to ranges from groupList + Set aggPushedAsRanges = caseAggPushedAndRangeBuilder.getLeft(); + AggregationBuilder rangeAggregationBuilder = caseAggPushedAndRangeBuilder.getRight(); + // The group-by list after removing CASE that can be converted to range queries + groupList = groupList.stream().filter(i -> !aggPushedAsRanges.contains(i)).toList(); // The top-level query is a range query: // - stats avg() by range_field @@ -252,7 +233,7 @@ public static Pair, OpenSearchAggregationResponseParser // - stats avg(), count() by range_field // RangeAgg // Metric - if (!groupsByCase.isEmpty() && groupList.isEmpty()) { + if (!aggPushedAsRanges.isEmpty() && groupList.isEmpty()) { return Pair.of( List.of(rangeAggregationBuilder), new BucketAggregationParser(metricParsers, countAggNames)); @@ -292,7 +273,7 @@ && isAutoDateSpan( // CompositeAgg // RangeAgg // Metric - else if (!groupsByCase.isEmpty()) { + else if (!aggPushedAsRanges.isEmpty()) { List> buckets = createCompositeBuckets(groupList, project, helper); return Pair.of( @@ -381,8 +362,37 @@ private static boolean supportCountFiled( == 1; } - private static List> analyzeCaseInProject( - List groupList, Project project, RelDataType rowType) { + /** + * Analyzes and converts CASE expressions in GROUP BY clauses to OpenSearch range aggregations. + * + *

This method identifies group by fields that are derived from CASE functions and transforms + * them into range aggregation builders. The resulting aggregations are cascaded in a hierarchical + * structure where range aggregations contain other range aggregations as sub-aggregations, with + * metric aggregations placed at the deepest level. + * + *

The aggregation hierarchy follows this pattern: + * + *

+   * RangeAggregation
+   *   └── RangeAggregation (nested)
+   *       └── ... (more range aggregations)
+   *           └── Metric Aggregation (at the bottom)
+   * 
+ * + * @param groupList the list of group by field indices from the query + * @param project the projection containing the expressions to analyze, may be null + * @param rowType the data type information for the current row structure + * @param metricBuilder the metric aggregation builder to be placed at the bottom of the hierarchy + * @return a pair containing: + *
    + *
  • A set of integers representing the indices of group fields that were successfully + * converted to range aggregations + *
  • The root range aggregation builder, or null if no CASE expressions were found or + * converted + *
+ */ + private static Pair, AggregationBuilder> pushCaseAsRanges( + List groupList, Project project, RelDataType rowType, Builder metricBuilder) { // Find group by fields derived from CASE functions and convert them to range queries List> groupsByCase = groupList.stream() @@ -401,7 +411,29 @@ private static List> analyzeCaseInProject .filter(p -> p.getRight().isPresent()) .map(p -> Pair.of(p.getLeft(), p.getRight().get())) .toList(); - return groupsByCase; + + // Cascade aggregations in such a way: + // RangeAggregation + // ...Any other range aggregations + // Metric Aggregation comes at last + // Note that but a composite aggregation can not be a sub aggregation of range aggregation, + // but range aggregation can be a sub aggregation of a composite aggregation. + AggregationBuilder rangeAggregationBuilder = null; + if (!groupsByCase.isEmpty()) { + for (int i = 0; i < groupsByCase.size(); i++) { + Pair p = groupsByCase.get(i); + if (i == 0) { + rangeAggregationBuilder = p.getRight(); + } else { + groupsByCase.get(i - 1).getRight().subAggregation(p.getRight()); + } + } + groupsByCase.getLast().getRight().subAggregations(metricBuilder); + } + + Set aggPushedAsRanges = + groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); + return Pair.of(aggPushedAsRanges, rangeAggregationBuilder); } private static Pair> processAggregateCalls( From e701e5776b31a1f7d0eb57b298c710d898d68313 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Sat, 11 Oct 2025 17:35:52 +0800 Subject: [PATCH 21/26] Chores: remove unused helper method Signed-off-by: Yuanchun Shen --- .../response/agg/BucketAggregationParser.java | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index fb2d42d0923..4abba2a3d2b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -122,16 +122,4 @@ public List> parse(SearchHits hits) { protected Map extract(CompositeAggregation.Bucket bucket) { return bucket.getKey(); } - - /** - * Extracts key-value pairs from a range aggregation bucket without processing its - * sub-aggregations. - * - * @param bucket the range aggregation bucket to extract data from - * @param name the name to use as the key in the returned map - * @return a map containing the bucket's key mapped to the provided name - */ - protected Map extract(Range.Bucket bucket, String name) { - return Map.of(name, bucket.getKey()); - } } From a22ae79f865a88d6369760218099c369d834baed Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 14 Oct 2025 13:40:04 +0800 Subject: [PATCH 22/26] Unify logics for creating nested aggregations Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalciteExplainIT.java | 32 +- ..._composite_autodate_range_metric_push.yaml | 11 + .../calcite/explain_agg_counts_by4.yaml | 2 +- .../explain_stats_bins_on_time_and_term.yaml | 2 +- .../explain_stats_bins_on_time_and_term2.yaml | 2 +- .../opensearch/request/AggregateAnalyzer.java | 338 ++++++++---------- 6 files changed, 174 insertions(+), 213 deletions(-) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 73f9e9ea7d5..4ec1ec57938 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -10,12 +10,14 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STRINGS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreId; import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsJsonIgnoreId; import java.io.IOException; import java.util.Locale; +import org.junit.Assume; import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ppl.ExplainIT; @@ -376,22 +378,6 @@ public void testExplainStatsWithSubAggregation() throws IOException { + " @timestamp, region")); } - @Test - public void bucketNullableNotSupportSubAggregation() throws IOException { - // TODO: Don't throw exception after addressing - // https://github.com/opensearch-project/sql/issues/4317 - // When bucketNullable is true, sub aggregation is not supported. Hence we cannot pushdown the - // aggregation in this query. Caused by issue - // https://github.com/opensearch-project/sql/issues/4317, - // bin aggregation on timestamp field won't work if not been push down. - enabledOnlyWhenPushdownIsEnabled(); - assertThrows( - Exception.class, - () -> - explainQueryToString( - "source=events | bin @timestamp bins=3 | stats count() by @timestamp, region")); - } - @Test public void testExplainBinWithSpan() throws IOException { String expected = loadExpectedPlan("explain_bin_span.yaml"); @@ -1087,4 +1073,18 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { + " as avg_balance by age_range, state", TEST_INDEX_BANK))); } + + @Test + public void testNestedAggregationsExplain() throws IOException { + // the query runs into error when pushdown is disabled due to bin's implementation + Assume.assumeFalse(isPushdownDisabled()); + assertYamlEqualsJsonIgnoreId( + loadExpectedPlan("agg_composite_autodate_range_metric_push.yaml"), + explainQueryToString( + String.format( + "source=%s | bin timestamp bins=3 | eval value_range = case(value < 7000, 'small'" + + " else 'great') | stats bucket_nullable=false avg(value), count() by" + + " timestamp, value_range, category", + TEST_INDEX_TIME_DATA))); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml new file mode 100644 index 00000000000..dccce23e18b --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_composite_autodate_range_metric_push.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(avg(value)=[$3], count()=[$4], timestamp=[$0], value_range=[$1], category=[$2]) + LogicalAggregate(group=[{0, 1, 2}], avg(value)=[AVG($3)], count()=[COUNT()]) + LogicalProject(timestamp=[$9], value_range=[$10], category=[$1], value=[$2]) + LogicalFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1))]) + LogicalProject(@timestamp=[$0], category=[$1], value=[$2], _id=[$4], _index=[$5], _score=[$6], _maxscore=[$7], _sort=[$8], _routing=[$9], timestamp=[WIDTH_BUCKET($3, 3, -(MAX($3) OVER (), MIN($3) OVER ()), MAX($3) OVER ())], value_range=[CASE(<($2, 7000), 'small':VARCHAR, 'great':VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_time_data]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 2, 3},avg(value)=AVG($1),count()=COUNT()), PROJECT->[avg(value), count(), timestamp, value_range, category], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"category":{"terms":{"field":"category","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"timestamp":{"auto_date_histogram":{"field":"timestamp","buckets":3,"minimum_interval":null},"aggregations":{"value_range":{"range":{"field":"value","ranges":[{"key":"small","to":7000.0},{"key":"great","from":7000.0}],"keyed":true},"aggregations":{"avg(value)":{"avg":{"field":"value"}}}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml index e56eb5ad662..77fc6c6eadf 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_agg_counts_by4.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(gender=[$4], account_number=[$0]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count()":{"value_count":{"field":"_index"}},"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT(),count(account_number)=COUNT($1)), PROJECT->[count(), count(account_number), gender], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"count(account_number)":{"value_count":{"field":"account_number"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml index 8d3e77e622e..14cf8e6db82 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"region":{"terms":{"field":"region","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":{"_key":"asc"}},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},count()=COUNT()), PROJECT->[count(), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml index ffc24ee8939..1dc48f5a550 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time_and_term2.yaml @@ -8,4 +8,4 @@ calcite: LogicalProject(environment=[$0], status_code=[$2], service=[$3], host=[$4], memory_usage=[$5], response_time=[$6], cpu_usage=[$7], region=[$8], bytes_sent=[$9], _id=[$10], _index=[$11], _score=[$12], _maxscore=[$13], _sort=[$14], _routing=[$15], @timestamp=[WIDTH_BUCKET($1, 3, -(MAX($1) OVER (), MIN($1) OVER ()), MAX($1) OVER ())]) CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 2},avg(cpu_usage)=AVG($0)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"region":{"terms":{"field":"region","size":1000,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":{"_key":"asc"}},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1, 2},avg(cpu_usage)=AVG($0)), PROJECT->[avg(cpu_usage), @timestamp, region], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"region":{"terms":{"field":"region","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 56e50a6151a..59c40522dbd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -37,11 +37,12 @@ import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; -import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.core.Aggregate; @@ -91,7 +92,6 @@ import org.opensearch.sql.opensearch.response.agg.SingleValueParser; import org.opensearch.sql.opensearch.response.agg.StatsParser; import org.opensearch.sql.opensearch.response.agg.TopHitsParser; -import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.BucketAggregationBuilder; import org.opensearch.sql.opensearch.storage.script.aggregation.dsl.CompositeAggregationBuilder; /** @@ -129,12 +129,6 @@ public static class ExpressionNotAnalyzableException extends Exception { } } - public static class CompositeAggUnSupportedException extends RuntimeException { - CompositeAggUnSupportedException(String message) { - super(message); - } - } - private AggregateAnalyzer() {} @RequiredArgsConstructor @@ -219,98 +213,72 @@ public static Pair, OpenSearchAggregationResponseParser Builder newMetricBuilder = countAggNameAndBuilderPair.getRight(); List countAggNames = countAggNameAndBuilderPair.getLeft(); - Pair, AggregationBuilder> caseAggPushedAndRangeBuilder = - pushCaseAsRanges(groupList, project, rowType, newMetricBuilder); - // Remove groups that are converted to ranges from groupList - Set aggPushedAsRanges = caseAggPushedAndRangeBuilder.getLeft(); - AggregationBuilder rangeAggregationBuilder = caseAggPushedAndRangeBuilder.getRight(); - // The group-by list after removing CASE that can be converted to range queries - groupList = groupList.stream().filter(i -> !aggPushedAsRanges.contains(i)).toList(); - - // The top-level query is a range query: - // - stats avg() by range_field - // - stats count() by range_field - // - stats avg(), count() by range_field - // RangeAgg - // Metric - if (!aggPushedAsRanges.isEmpty() && groupList.isEmpty()) { - return Pair.of( - List.of(rangeAggregationBuilder), - new BucketAggregationParser(metricParsers, countAggNames)); + // Used to track the current sub-builder as analysis progresses + Builder subBuilder = newMetricBuilder; + + Pair, AggregationBuilder> aggPushedAndAggBuilder = + createNestedAggregation(groupList, project, subBuilder, helper); + Set aggPushed = aggPushedAndAggBuilder.getLeft(); + AggregationBuilder pushedAggBuilder = aggPushedAndAggBuilder.getRight(); + // The group-by list after removing composite-incompatible aggregations + groupList = groupList.stream().filter(i -> !aggPushed.contains(i)).toList(); + if (pushedAggBuilder != null) { + subBuilder = new Builder().addAggregator(pushedAggBuilder); } - // No parent composite aggregation or range aggregation is attached: + + // No group-by clause -- no parent aggregations are attached: // - stats count() - // - stats avg() + // - stats avg(), count() // Metric - else if (aggregate.getGroupSet().isEmpty() && groupList.isEmpty()) { - if (newMetricBuilder == null) { + if (aggregate.getGroupSet().isEmpty()) { + if (subBuilder == null) { // The optimization must require all count aggregations are removed, // and they have only one field name return Pair.of(List.of(), new CountAsTotalHitsParser(countAggNames)); } else { return Pair.of( - ImmutableList.copyOf(newMetricBuilder.getAggregatorFactories()), + ImmutableList.copyOf(subBuilder.getAggregatorFactories()), new NoBucketAggregationParser(metricParsers)); } } - // AutoDateHistogram as top-level aggregation: bin timestamp bins=3 | stats avg(balance) by - // timestamp - // AutoDateHistogram + // No composite aggregation at top-level + // - stats avg() by range_field + // - stats count() by auto_date_span + // - stats count() by ...auto_date_spans, ...range_fields + // [AutoDateHistogram | RangeAgg]+ // Metric - else if (aggregate.getGroupSet().length() == 1 - && isAutoDateSpan( - project.getProjects().get(aggregate.getGroupSet().asList().getFirst()))) { - ValuesSourceAggregationBuilder bucketBuilder = createBucket(0, project, helper); - if (newMetricBuilder != null) { - bucketBuilder.subAggregations(newMetricBuilder); - } + else if (groupList.isEmpty()) { return Pair.of( - Collections.singletonList(bucketBuilder), + ImmutableList.copyOf(subBuilder.getAggregatorFactories()), new BucketAggregationParser(metricParsers, countAggNames)); } - // It has both composite aggregation and range aggregation: - // - stats avg() by range_field, non_range_field + // Composite aggregation at top level -- it has composite aggregation, with or without its + // incompatible value sources as sub-aggregations + // - stats avg() by term_fields + // - stats avg() by date_histogram + // - stats count() by auto_date_span, range_field, term_fields // CompositeAgg - // RangeAgg - // Metric - else if (!aggPushedAsRanges.isEmpty()) { + // [...RangeAgg] + // [...AutoDateHistogramAgg] + // Metric + else { List> buckets = createCompositeBuckets(groupList, project, helper); + if (buckets.size() != groupList.size()) { + throw new UnsupportedOperationException( + "Not all the left aggregations can be converted to value sources of composite" + + " aggregation"); + } + AggregationBuilder compositeBuilder = + AggregationBuilders.composite("composite_buckets", buckets) + .size(AGGREGATION_BUCKET_SIZE); + if (subBuilder != null) { + compositeBuilder.subAggregations(subBuilder); + } return Pair.of( - Collections.singletonList( - AggregationBuilders.composite("composite_buckets", buckets) - .subAggregation(rangeAggregationBuilder) - .size(AGGREGATION_BUCKET_SIZE)), + Collections.singletonList(compositeBuilder), new BucketAggregationParser(metricParsers, countAggNames)); } - // It does not have range aggregation, but has composite aggregation: - // - stats avg() by non_range_field - // CompositeAgg - // Metric - else { - AggregationBuilder aggregationBuilder; - try { - List> buckets = - createCompositeBuckets(groupList, project, helper); - aggregationBuilder = - AggregationBuilders.composite("composite_buckets", buckets) - .size(AGGREGATION_BUCKET_SIZE); - if (newMetricBuilder != null) { - aggregationBuilder.subAggregations(metricBuilder); - } - return Pair.of( - Collections.singletonList(aggregationBuilder), - new BucketAggregationParser(metricParsers, countAggNames)); - } catch (CompositeAggUnSupportedException e) { - if (bucketNullable) { - throw new UnsupportedOperationException(e.getMessage()); - } - aggregationBuilder = createNestedBuckets(groupList, project, newMetricBuilder, helper); - return Pair.of( - Collections.singletonList(aggregationBuilder), - new BucketAggregationParser(metricParsers, countAggNames)); - } - } } catch (Throwable e) { Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class); throw new ExpressionNotAnalyzableException("Can't convert " + aggregate, e); @@ -362,80 +330,6 @@ private static boolean supportCountFiled( == 1; } - /** - * Analyzes and converts CASE expressions in GROUP BY clauses to OpenSearch range aggregations. - * - *

This method identifies group by fields that are derived from CASE functions and transforms - * them into range aggregation builders. The resulting aggregations are cascaded in a hierarchical - * structure where range aggregations contain other range aggregations as sub-aggregations, with - * metric aggregations placed at the deepest level. - * - *

The aggregation hierarchy follows this pattern: - * - *

-   * RangeAggregation
-   *   └── RangeAggregation (nested)
-   *       └── ... (more range aggregations)
-   *           └── Metric Aggregation (at the bottom)
-   * 
- * - * @param groupList the list of group by field indices from the query - * @param project the projection containing the expressions to analyze, may be null - * @param rowType the data type information for the current row structure - * @param metricBuilder the metric aggregation builder to be placed at the bottom of the hierarchy - * @return a pair containing: - *
    - *
  • A set of integers representing the indices of group fields that were successfully - * converted to range aggregations - *
  • The root range aggregation builder, or null if no CASE expressions were found or - * converted - *
- */ - private static Pair, AggregationBuilder> pushCaseAsRanges( - List groupList, Project project, RelDataType rowType, Builder metricBuilder) { - // Find group by fields derived from CASE functions and convert them to range queries - List> groupsByCase = - groupList.stream() - .filter(i -> project != null && i < project.getProjects().size()) - .map(i -> Pair.of(i, project.getNamedProjects().get(i))) - .filter( - p -> - p.getRight().getKey() instanceof RexCall rexCall - && rexCall.getKind() == SqlKind.CASE) - .map( - p -> - Pair.of( - p.getLeft(), - CaseRangeAnalyzer.create(p.getRight().getValue(), rowType) - .analyze((RexCall) p.getRight().getKey()))) - .filter(p -> p.getRight().isPresent()) - .map(p -> Pair.of(p.getLeft(), p.getRight().get())) - .toList(); - - // Cascade aggregations in such a way: - // RangeAggregation - // ...Any other range aggregations - // Metric Aggregation comes at last - // Note that but a composite aggregation can not be a sub aggregation of range aggregation, - // but range aggregation can be a sub aggregation of a composite aggregation. - AggregationBuilder rangeAggregationBuilder = null; - if (!groupsByCase.isEmpty()) { - for (int i = 0; i < groupsByCase.size(); i++) { - Pair p = groupsByCase.get(i); - if (i == 0) { - rangeAggregationBuilder = p.getRight(); - } else { - groupsByCase.get(i - 1).getRight().subAggregation(p.getRight()); - } - } - groupsByCase.getLast().getRight().subAggregations(metricBuilder); - } - - Set aggPushedAsRanges = - groupsByCase.stream().map(Pair::getLeft).collect(Collectors.toSet()); - return Pair.of(aggPushedAsRanges, rangeAggregationBuilder); - } - private static Pair> processAggregateCalls( List aggFieldNames, List aggCalls, @@ -651,22 +545,106 @@ private static List> createCompositeBuckets( return resultBuilder.build(); } - private static ValuesSourceAggregationBuilder createNestedBuckets( + /** + * Creates nested bucket aggregations for expressions that are not qualified as value sources for + * composite aggregations. + * + *

This method processes a list of group by expressions and identifies those that cannot be + * used as value sources in composite aggregations but can be pushed down as sub-aggregations, + * such as auto date histograms and range buckets. + * + *

The aggregation hierarchy follows this pattern: + * + *

+   * AutoDateHistogram | RangeAggregation
+   *   └── AutoDateHistogram | RangeAggregation (nested)
+   *       └── ... (more composite-incompatible aggregations)
+   *           └── Metric Aggregation (at the bottom)
+   * 
+ * + * @param groupList the list of group by field indices from the query + * @param project the projection containing the expressions to analyze + * @param metricBuilder the metric aggregation builder to be placed at the bottom of the hierarchy + * @param helper the aggregation builder helper containing row type and utility methods + * @return a pair containing: + *
    + *
  • A set of integers representing the indices of group fields that were successfully + * pushed as sub-aggregations + *
  • The root aggregation builder, or null if no such expressions were found + *
+ */ + private static Pair, AggregationBuilder> createNestedAggregation( List groupList, Project project, Builder metricBuilder, AggregateAnalyzer.AggregateBuilderHelper helper) { - ValuesSourceAggregationBuilder rootAgg = createBucket(groupList.get(0), project, helper); - ValuesSourceAggregationBuilder currentAgg = rootAgg; - for (int i = 1; i < groupList.size(); i++) { - ValuesSourceAggregationBuilder nextAgg = createBucket(groupList.get(i), project, helper); - currentAgg.subAggregations(new AggregatorFactories.Builder().addAggregator(nextAgg)); - currentAgg = nextAgg; + AggregationBuilder rootAggBuilder = null; + AggregationBuilder tailAggBuilder = null; + + Set aggPushed = new HashSet<>(); + for (Integer i : groupList) { + RexNode agg = project.getProjects().get(i); + String name = project.getNamedProjects().get(i).getValue(); + AggregationBuilder aggBuilder = createCompositeIncompatibleAggregation(agg, name, helper); + if (aggBuilder != null) { + aggPushed.add(i); + if (rootAggBuilder == null) { + rootAggBuilder = aggBuilder; + } else { + tailAggBuilder.subAggregation(aggBuilder); + } + tailAggBuilder = aggBuilder; + } } - if (metricBuilder != null) { - currentAgg.subAggregations(metricBuilder); + if (tailAggBuilder != null && metricBuilder != null) { + tailAggBuilder.subAggregations(metricBuilder); } - return rootAgg; + return Pair.of(aggPushed, rootAggBuilder); + } + + /** + * Creates an aggregation builder for expressions that are not qualified as composite aggregation + * value sources. + * + *

This method analyzes RexNode expressions and creates appropriate OpenSearch aggregation + * builders for cases where they can not be value sources of a composite aggregation. + * + *

The method supports the following aggregation types: + * + *

+   * - Auto Date Histogram Aggregation: For temporal bucketing with automatic interval selection
+   * - Range Aggregation: For CASE expressions that define value ranges
+   * 
+ * + * @param agg the RexNode expression to analyze and convert + * @param name the name to assign to the created aggregation builder + * @param helper the aggregation builder helper containing row type and utility methods + * @return the appropriate ValuesSourceAggregationBuilder for the expression, or null if no + * compatible aggregation type is found + */ + private static ValuesSourceAggregationBuilder createCompositeIncompatibleAggregation( + RexNode agg, String name, AggregateBuilderHelper helper) { + ValuesSourceAggregationBuilder aggBuilder = null; + if (isAutoDateSpan(agg)) { + aggBuilder = analyzeAutoDateSpan(agg, name, helper); + } else if (isCase(agg)) { + Optional rangeAggBuilder = + CaseRangeAnalyzer.create(name, helper.rowType).analyze((RexCall) agg); + if (rangeAggBuilder.isPresent()) { + aggBuilder = rangeAggBuilder.get(); + } + } + return aggBuilder; + } + + private static AutoDateHistogramAggregationBuilder analyzeAutoDateSpan( + RexNode spanAgg, String name, AggregateAnalyzer.AggregateBuilderHelper helper) { + RexCall rexCall = (RexCall) spanAgg; + RexInputRef rexInputRef = (RexInputRef) rexCall.getOperands().getFirst(); + RexLiteral valueLiteral = (RexLiteral) rexCall.getOperands().get(1); + return new AutoDateHistogramAggregationBuilder(name) + .field(helper.inferNamedField(rexInputRef).getRootName()) + .setNumBuckets(requireNonNull(valueLiteral.getValueAs(Integer.class))); } private static boolean isAutoDateSpan(RexNode rex) { @@ -675,37 +653,12 @@ private static boolean isAutoDateSpan(RexNode rex) { && rexCall.getOperator().equals(WIDTH_BUCKET); } - private static ValuesSourceAggregationBuilder createBucket( - Integer groupIndex, Project project, AggregateBuilderHelper helper) { - RexNode rex = project.getProjects().get(groupIndex); - String bucketName = project.getRowType().getFieldList().get(groupIndex).getName(); - if (rex instanceof RexCall rexCall - && rexCall.getKind() == SqlKind.OTHER_FUNCTION - && rexCall.getOperator().getName().equalsIgnoreCase(BuiltinFunctionName.SPAN.name()) - && rexCall.getOperands().size() == 3 - && rexCall.getOperands().getFirst() instanceof RexInputRef rexInputRef - && rexCall.getOperands().get(1) instanceof RexLiteral valueLiteral - && rexCall.getOperands().get(2) instanceof RexLiteral unitLiteral) { - return BucketAggregationBuilder.buildHistogram( - bucketName, - helper.inferNamedField(rexInputRef).getRootName(), - valueLiteral.getValueAs(Double.class), - SpanUnit.of(unitLiteral.getValueAs(String.class))); - } else if (isAutoDateSpan(rex)) { - RexCall rexCall = (RexCall) rex; - RexInputRef rexInputRef = (RexInputRef) rexCall.getOperands().getFirst(); - RexLiteral valueLiteral = (RexLiteral) rexCall.getOperands().get(1); - return new AutoDateHistogramAggregationBuilder(bucketName) - .field(helper.inferNamedField(rexInputRef).getRootName()) - .setNumBuckets(requireNonNull(valueLiteral.getValueAs(Integer.class))); - } else { - return createTermsAggregationBuilder(bucketName, rex, helper); - } + private static boolean isCase(RexNode rex) { + return rex instanceof RexCall rexCall && rexCall.getKind() == SqlKind.CASE; } private static CompositeValuesSourceBuilder createCompositeBucket( - Integer groupIndex, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) - throws CompositeAggUnSupportedException { + Integer groupIndex, Project project, AggregateAnalyzer.AggregateBuilderHelper helper) { RexNode rex = project.getProjects().get(groupIndex); String bucketName = project.getRowType().getFieldNames().get(groupIndex); if (rex instanceof RexCall rexCall @@ -722,9 +675,6 @@ private static CompositeValuesSourceBuilder createCompositeBucket( SpanUnit.of(unitLiteral.getValueAs(String.class)), MissingOrder.FIRST, helper.bucketNullable); - } else if (isAutoDateSpan(rex)) { - throw new CompositeAggUnSupportedException( - "auto_date_histogram is not supported in composite agg."); } else { return createTermsSourceBuilder(bucketName, rex, helper); } From 010fd0659508f5d7f4222db642cb925db5de00d4 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Tue, 14 Oct 2025 14:06:03 +0800 Subject: [PATCH 23/26] Remove a note in condition.rst Signed-off-by: Yuanchun Shen --- docs/user/ppl/functions/condition.rst | 2 -- .../sql/opensearch/request/AggregateAnalyzer.java | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/user/ppl/functions/condition.rst b/docs/user/ppl/functions/condition.rst index 0441c49a12d..c4d52f74913 100644 --- a/docs/user/ppl/functions/condition.rst +++ b/docs/user/ppl/functions/condition.rst @@ -235,8 +235,6 @@ When each condition is a field comparison with a numeric literal and each result - Null values will not be grouped into any bucket of a range aggregation and will be ignored - The default ELSE clause will use the string literal ``"null"`` instead of actual NULL values -To avoid these edge-case limitations, set ``plugins.calcite.pushdown.enabled`` to false. - Example:: os> source=accounts | eval result = case(age > 35, firstname, age < 30, lastname else employer) | fields result, firstname, lastname, age, employer diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 59c40522dbd..87758c17669 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -216,11 +216,12 @@ public static Pair, OpenSearchAggregationResponseParser // Used to track the current sub-builder as analysis progresses Builder subBuilder = newMetricBuilder; + // Push auto date span & case in group-by list into nested aggregations Pair, AggregationBuilder> aggPushedAndAggBuilder = createNestedAggregation(groupList, project, subBuilder, helper); Set aggPushed = aggPushedAndAggBuilder.getLeft(); AggregationBuilder pushedAggBuilder = aggPushedAndAggBuilder.getRight(); - // The group-by list after removing composite-incompatible aggregations + // The group-by list after removing pushed aggregations groupList = groupList.stream().filter(i -> !aggPushed.contains(i)).toList(); if (pushedAggBuilder != null) { subBuilder = new Builder().addAggregator(pushedAggBuilder); @@ -258,9 +259,8 @@ else if (groupList.isEmpty()) { // - stats avg() by date_histogram // - stats count() by auto_date_span, range_field, term_fields // CompositeAgg - // [...RangeAgg] - // [...AutoDateHistogramAgg] - // Metric + // [AutoDateHistogram | RangeAgg]* + // Metric else { List> buckets = createCompositeBuckets(groupList, project, helper); From 3b65b2dc9d32862dea26c78ecf1cb820b32c82d4 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 15 Oct 2025 18:40:30 +0800 Subject: [PATCH 24/26] Optmize range aggregation Signed-off-by: Yuanchun Shen --- .../remote/CalcitePPLCaseFunctionIT.java | 45 ++++++++ .../calcite/agg_case_cannot_push.yaml | 2 +- .../agg_case_composite_cannot_push.yaml | 2 +- .../calcite/agg_case_num_res_cannot_push.yaml | 2 +- .../calcite/explain_stats_bins_on_time.yaml | 3 +- .../calcite/explain_stats_bins_on_time2.yaml | 3 +- .../opensearch/request/AggregateAnalyzer.java | 103 +++++++++--------- .../response/agg/BucketAggregationParser.java | 58 ++++++---- .../storage/scan/CalciteLogicalIndexScan.java | 23 ---- 9 files changed, 138 insertions(+), 103 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index 719ed9c39c8..d0b253500c5 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -6,12 +6,14 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY_WITH_NULL; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS; import static org.opensearch.sql.util.MatcherUtils.closeTo; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; @@ -31,6 +33,7 @@ public void init() throws Exception { loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.STATE_COUNTRY_WITH_NULL); loadIndex(Index.BANK); + loadIndex(Index.OTELLOGS); appendDataForBadResponse(); } @@ -470,4 +473,46 @@ public void testCaseAggWithNullValues() throws IOException { verifyDataRows(actual, rows(10, "teenager"), rows(25, "adult"), rows(70, "senior")); } } + + @Test + public void testNestedCaseAggWithAutoDateHistogram() throws IOException { + JSONObject actual1 = + executeQuery( + String.format( + "source=%s | bin @timestamp bins=2 | eval severity_range = case(severityNumber <" + + " 16, 'minor' else 'severe') | stats avg(severityNumber), count() by" + + " @timestamp, severity_range, flags", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + actual1, + schema("avg(severityNumber)", "double"), + schema("count()", "bigint"), + schema("@timestamp", "timestamp"), + schema("severity_range", "string"), + schema("flags", "bigint")); + + verifyDataRows( + actual1, + rows(8.85, 20, "2024-01-15 10:30:02", "minor", 0), + rows(20, 9, "2024-01-15 10:30:02", "severe", 0), + rows(9, 1, "2024-01-15 10:30:00", "minor", 1), + rows(17, 1, "2024-01-15 10:30:00", "severe", 1), + rows(1, 1, "2024-01-15 10:30:05", "minor", 1)); + + JSONObject actual2 = + executeQuery( + String.format( + "source=%s | bin @timestamp bins=100 | eval severity_range = case(severityNumber <" + + " 16, 'minor' else 'severe') | stats avg(severityNumber), count() by" + + " @timestamp, severity_range, flags", + TEST_INDEX_OTEL_LOGS)); + verifySchema( + actual2, + schema("avg(severityNumber)", "double"), + schema("count()", "bigint"), + schema("@timestamp", "timestamp"), + schema("severity_range", "string"), + schema("flags", "bigint")); + verifyNumOfRows(actual2, 32); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml index 02f92021453..0198a44e025 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0BXp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMTAsCiAgICAgICAgICAibmFtZSI6ICIkMTAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJjbG9zZWQiLAogICAgICAgICAgICAgICAgIjMwIiwKICAgICAgICAgICAgICAgICI0MCIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVU5LTk9XTiIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1NDAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTEwMCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABdqYXZhLnV0aWwuTGlua2VkSGFzaE1hcDTATlwQbMD7AgABWgALYWNjZXNzT3JkZXJ4cgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAADXQADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZX5xAH4AC3QABlNUUklOR3QAB2FkZHJlc3NzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AFHhwfnEAfgALdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAx0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAfeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAhAAAAAHNxAH4AAAAAAAN3BAAAAAB4dAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHEAfgAVfnEAfgALdAAJVElNRVNUQU1QfnEAfgAbdAAERGF0ZXEAfgAgc3EAfgAAAAAAAXcEAAAAAHh0AAZnZW5kZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4AFXEAfgAQfnEAfgAbdAAHS2V5d29yZHEAfgAgeHQABGNpdHlxAH4AEHQACGxhc3RuYW1lcQB+ABB0AAdiYWxhbmNlcQB+AA10AAhlbXBsb3llcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAFc3RhdGVzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACcQB+ADFxAH4AMnh0AANhZ2V+cQB+AAt0AAdJTlRFR0VSdAAFZW1haWxzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABG1hbGV+cQB+AAt0AAdCT09MRUFOeAB4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0BXp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMTAsCiAgICAgICAgICAibmFtZSI6ICIkMTAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJjbG9zZWQiLAogICAgICAgICAgICAgICAgIjMwIiwKICAgICAgICAgICAgICAgICI0MCIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVU5LTk9XTiIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1NDAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTEwMCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAABh3CAAAACAAAAATdAAIX3JvdXRpbmd+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAZTVFJJTkd0AA5hY2NvdW50X251bWJlcn5xAH4ACnQABExPTkd0AAlmaXJzdG5hbWVxAH4ADHQAB2FkZHJlc3NzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AFHhwfnEAfgAKdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAt0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAfeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAhAAAAAHNxAH4AAAAAAAN3BAAAAAB4dAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHEAfgAVfnEAfgAKdAAJVElNRVNUQU1QfnEAfgAbdAAERGF0ZXEAfgAgc3EAfgAAAAAAAXcEAAAAAHh0AAZnZW5kZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4AFXEAfgAMfnEAfgAbdAAHS2V5d29yZHEAfgAgeHQABl9pbmRleHEAfgAMdAAEY2l0eXEAfgAMdAAJX21heHNjb3JlfnEAfgAKdAAFRkxPQVR0AAZfc2NvcmVxAH4AOHQABV9zb3J0cQB+AA90AAhsYXN0bmFtZXEAfgAMdAAHYmFsYW5jZXEAfgAPdAAIZW1wbG95ZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABXN0YXRlc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnEAfgAxcQB+ADJ4dAADX2lkcQB+AAx0AANhZ2V+cQB+AAp0AAdJTlRFR0VSdAAFZW1haWxzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABG1hbGV+cQB+AAp0AAdCT09MRUFOeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml index 7b64abe160c..833c6a16a12 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0An97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMTEsCiAgICAgICJuYW1lIjogIiQxMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAF2phdmEudXRpbC5MaW5rZWRIYXNoTWFwNMBOXBBswPsCAAFaAAthY2Nlc3NPcmRlcnhyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAABh3CAAAACAAAAANdAAOYWNjb3VudF9udW1iZXJ+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AARMT05HdAAJZmlyc3RuYW1lfnEAfgALdAAGU1RSSU5HdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAUeHB+cQB+AAt0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4ADHQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+AB94cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ACEAAAAAc3EAfgAAAAAAA3cEAAAAAHh0AAliaXJ0aGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cQB+ABV+cQB+AAt0AAlUSU1FU1RBTVB+cQB+ABt0AAREYXRlcQB+ACBzcQB+AAAAAAABdwQAAAAAeHQABmdlbmRlcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAVcQB+ABB+cQB+ABt0AAdLZXl3b3JkcQB+ACB4dAAEY2l0eXEAfgAQdAAIbGFzdG5hbWVxAH4AEHQAB2JhbGFuY2VxAH4ADXQACGVtcGxveWVyc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AAVzdGF0ZXNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJxAH4AMXEAfgAyeHQAA2FnZX5xAH4AC3QAB0lOVEVHRVJ0AAVlbWFpbHNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAEbWFsZX5xAH4AC3QAB0JPT0xFQU54AHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0An97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMTEsCiAgICAgICJuYW1lIjogIiQxMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAAGHcIAAAAIAAAABN0AAhfcm91dGluZ35yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3QADmFjY291bnRfbnVtYmVyfnEAfgAKdAAETE9OR3QACWZpcnN0bmFtZXEAfgAMdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAUeHB+cQB+AAp0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AC3QABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+AB94cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ACEAAAAAc3EAfgAAAAAAA3cEAAAAAHh0AAliaXJ0aGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cQB+ABV+cQB+AAp0AAlUSU1FU1RBTVB+cQB+ABt0AAREYXRlcQB+ACBzcQB+AAAAAAABdwQAAAAAeHQABmdlbmRlcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAVcQB+AAx+cQB+ABt0AAdLZXl3b3JkcQB+ACB4dAAGX2luZGV4cQB+AAx0AARjaXR5cQB+AAx0AAlfbWF4c2NvcmV+cQB+AAp0AAVGTE9BVHQABl9zY29yZXEAfgA4dAAFX3NvcnRxAH4AD3QACGxhc3RuYW1lcQB+AAx0AAdiYWxhbmNlcQB+AA90AAhlbXBsb3llcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAFc3RhdGVzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACcQB+ADFxAH4AMnh0AANfaWRxAH4ADHQAA2FnZX5xAH4ACnQAB0lOVEVHRVJ0AAVlbWFpbHNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAEbWFsZX5xAH4ACnQAB0JPT0xFQU54eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml index 544bacdcc9c..dd13c6cfe2e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0Ap97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEwMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgAXamF2YS51dGlsLkxpbmtlZEhhc2hNYXA0wE5cEGzA+wIAAVoAC2FjY2Vzc09yZGVyeHIAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAAGHcIAAAAIAAAAA10AA5hY2NvdW50X251bWJlcn5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABExPTkd0AAlmaXJzdG5hbWV+cQB+AAt0AAZTVFJJTkd0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABR4cH5xAH4AC3QAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgAMdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AH3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AIQAAAABzcQB+AAAAAAADdwQAAAAAeHQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hxAH4AFX5xAH4AC3QACVRJTUVTVEFNUH5xAH4AG3QABERhdGVxAH4AIHNxAH4AAAAAAAF3BAAAAAB4dAAGZ2VuZGVyc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABVxAH4AEH5xAH4AG3QAB0tleXdvcmRxAH4AIHh0AARjaXR5cQB+ABB0AAhsYXN0bmFtZXEAfgAQdAAHYmFsYW5jZXEAfgANdAAIZW1wbG95ZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABXN0YXRlc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnEAfgAxcQB+ADJ4dAADYWdlfnEAfgALdAAHSU5URUdFUnQABWVtYWlsc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AARtYWxlfnEAfgALdAAHQk9PTEVBTngAeA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0Ap97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEwMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAAE3QACF9yb3V0aW5nfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAGU1RSSU5HdAAOYWNjb3VudF9udW1iZXJ+cQB+AAp0AARMT05HdAAJZmlyc3RuYW1lcQB+AAx0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABR4cH5xAH4ACnQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgALdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AH3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AIQAAAABzcQB+AAAAAAADdwQAAAAAeHQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hxAH4AFX5xAH4ACnQACVRJTUVTVEFNUH5xAH4AG3QABERhdGVxAH4AIHNxAH4AAAAAAAF3BAAAAAB4dAAGZ2VuZGVyc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABVxAH4ADH5xAH4AG3QAB0tleXdvcmRxAH4AIHh0AAZfaW5kZXhxAH4ADHQABGNpdHlxAH4ADHQACV9tYXhzY29yZX5xAH4ACnQABUZMT0FUdAAGX3Njb3JlcQB+ADh0AAVfc29ydHEAfgAPdAAIbGFzdG5hbWVxAH4ADHQAB2JhbGFuY2VxAH4AD3QACGVtcGxveWVyc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AAVzdGF0ZXNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJxAH4AMXEAfgAyeHQAA19pZHEAfgAMdAADYWdlfnEAfgAKdAAHSU5URUdFUnQABWVtYWlsc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AARtYWxlfnEAfgAKdAAHQk9PTEVBTnh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml index b3f3f5aed9b..5c7850cdf5e 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time.yaml @@ -7,5 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[0], expr#3=[>($t1, $t2)], count()=[$t1], @timestamp=[$t0], $condition=[$t3]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT())], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml index a0080e88f90..4efe38c96d1 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_stats_bins_on_time2.yaml @@ -7,5 +7,4 @@ calcite: CalciteLogicalIndexScan(table=[[OpenSearch, events]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..1=[{inputs}], expr#2=[IS NOT NULL($t1)], avg(cpu_usage)=[$t1], @timestamp=[$t0], $condition=[$t2]) - CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(cpu_usage)=AVG($1))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, events]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg(cpu_usage)=AVG($1)), PROJECT->[avg(cpu_usage), @timestamp]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"@timestamp":{"auto_date_histogram":{"field":"@timestamp","buckets":3,"minimum_interval":null},"aggregations":{"avg(cpu_usage)":{"avg":{"field":"cpu_usage"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java index 7ec1f6cb432..d6b6a67b082 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/AggregateAnalyzer.java @@ -213,71 +213,72 @@ public static Pair, OpenSearchAggregationResponseParser Builder newMetricBuilder = countAggNameAndBuilderPair.getRight(); List countAggNames = countAggNameAndBuilderPair.getLeft(); - // Used to track the current sub-builder as analysis progresses - Builder subBuilder = newMetricBuilder; - - // Push auto date span & case in group-by list into nested aggregations - Pair, AggregationBuilder> aggPushedAndAggBuilder = - createNestedAggregation(groupList, project, subBuilder, helper); - Set aggPushed = aggPushedAndAggBuilder.getLeft(); - AggregationBuilder pushedAggBuilder = aggPushedAndAggBuilder.getRight(); - // The group-by list after removing pushed aggregations - groupList = groupList.stream().filter(i -> !aggPushed.contains(i)).toList(); - if (pushedAggBuilder != null) { - subBuilder = new Builder().addAggregator(pushedAggBuilder); - } - // No group-by clause -- no parent aggregations are attached: // - stats count() // - stats avg(), count() // Metric if (aggregate.getGroupSet().isEmpty()) { - if (subBuilder == null) { + if (newMetricBuilder == null) { // The optimization must require all count aggregations are removed, // and they have only one field name return Pair.of(List.of(), new CountAsTotalHitsParser(countAggNames)); } else { return Pair.of( - ImmutableList.copyOf(subBuilder.getAggregatorFactories()), + ImmutableList.copyOf(newMetricBuilder.getAggregatorFactories()), new NoBucketAggregationParser(metricParsers)); } - } - // No composite aggregation at top-level - // - stats avg() by range_field - // - stats count() by auto_date_span - // - stats count() by ...auto_date_spans, ...range_fields - // [AutoDateHistogram | RangeAgg]+ - // Metric - else if (groupList.isEmpty()) { - return Pair.of( - ImmutableList.copyOf(subBuilder.getAggregatorFactories()), - new BucketAggregationParser(metricParsers, countAggNames)); - } - // Composite aggregation at top level -- it has composite aggregation, with or without its - // incompatible value sources as sub-aggregations - // - stats avg() by term_fields - // - stats avg() by date_histogram - // - stats count() by auto_date_span, range_field, term_fields - // CompositeAgg - // [AutoDateHistogram | RangeAgg]* - // Metric - else { - List> buckets = - createCompositeBuckets(groupList, project, helper); - if (buckets.size() != groupList.size()) { - throw new UnsupportedOperationException( - "Not all the left aggregations can be converted to value sources of composite" - + " aggregation"); + } else { + // Used to track the current sub-builder as analysis progresses + Builder subBuilder = newMetricBuilder; + // Push auto date span & case in group-by list into nested aggregations + Pair, AggregationBuilder> aggPushedAndAggBuilder = + createNestedAggregation(groupList, project, subBuilder, helper); + Set aggPushed = aggPushedAndAggBuilder.getLeft(); + AggregationBuilder pushedAggBuilder = aggPushedAndAggBuilder.getRight(); + // The group-by list after removing pushed aggregations + groupList = groupList.stream().filter(i -> !aggPushed.contains(i)).toList(); + if (pushedAggBuilder != null) { + subBuilder = new Builder().addAggregator(pushedAggBuilder); + } + + // No composite aggregation at top-level -- auto date span & case in group-by list are + // pushed into nested aggregations: + // - stats avg() by range_field + // - stats count() by auto_date_span + // - stats count() by ...auto_date_spans, ...range_fields + // [AutoDateHistogram | RangeAgg]+ + // Metric + if (groupList.isEmpty()) { + return Pair.of( + ImmutableList.copyOf(subBuilder.getAggregatorFactories()), + new BucketAggregationParser(metricParsers, countAggNames)); } - AggregationBuilder compositeBuilder = - AggregationBuilders.composite("composite_buckets", buckets) - .size(AGGREGATION_BUCKET_SIZE); - if (subBuilder != null) { - compositeBuilder.subAggregations(subBuilder); + // Composite aggregation at top level -- it has composite aggregation, with or without its + // incompatible value sources as sub-aggregations: + // - stats avg() by term_fields + // - stats avg() by date_histogram + // - stats count() by auto_date_span, range_field, term_fields + // CompositeAgg + // [AutoDateHistogram | RangeAgg]* + // Metric + else { + List> buckets = + createCompositeBuckets(groupList, project, helper); + if (buckets.size() != groupList.size()) { + throw new UnsupportedOperationException( + "Not all the left aggregations can be converted to value sources of composite" + + " aggregation"); + } + AggregationBuilder compositeBuilder = + AggregationBuilders.composite("composite_buckets", buckets) + .size(AGGREGATION_BUCKET_SIZE); + if (subBuilder != null) { + compositeBuilder.subAggregations(subBuilder); + } + return Pair.of( + Collections.singletonList(compositeBuilder), + new BucketAggregationParser(metricParsers, countAggNames)); } - return Pair.of( - Collections.singletonList(compositeBuilder), - new BucketAggregationParser(metricParsers, countAggNames)); } } catch (Throwable e) { Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java index 4abba2a3d2b..5fde477fd06 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/BucketAggregationParser.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import lombok.EqualsAndHashCode; import lombok.Getter; import org.opensearch.search.SearchHits; @@ -16,7 +17,9 @@ import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.bucket.MultiBucketsAggregation; import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.histogram.InternalAutoDateHistogram; import org.opensearch.search.aggregations.bucket.range.Range; +import org.opensearch.search.aggregations.bucket.terms.ParsedStringTerms; /** * Use BucketAggregationParser only when there is a single group-by key, it returns multiple @@ -55,27 +58,21 @@ public List> parse(Aggregations aggregations) { private List> parseBucket( MultiBucketsAggregation.Bucket bucket, String name) { + // return null so that an empty bucket of range or date span will be filtered out + if (bucket instanceof Range.Bucket || bucket instanceof InternalAutoDateHistogram.Bucket) { + if (bucket.getDocCount() == 0) { + return null; + } + } + Aggregations aggregations = bucket.getAggregations(); List> results = isLeafAgg(aggregations) ? parseLeafAgg(aggregations, bucket.getDocCount()) : parse(aggregations); - if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { - Map common = extract(compositeBucket); - for (Map r : results) { - r.putAll(common); - } - } else if (bucket instanceof Range.Bucket) { - // return null so that an empty range will be filtered out - if (bucket.getDocCount() == 0) { - return null; - } - // the content of the range bucket is extracted with `r.put(name, bucket.getKey())` below - } - for (Map r : results) { - r.put(name, bucket.getKey()); - } + Optional> common = extract(bucket, name); + common.ifPresent(commonMap -> results.forEach(r -> r.putAll(commonMap))); return results; } @@ -97,10 +94,11 @@ public List> parse(SearchHits hits) { } /** - * Extracts key-value pairs from a composite aggregation bucket without processing its + * Extracts key-value pairs from different types of aggregation buckets without processing their * sub-aggregations. * - *

For example, for the following CompositeAggregation bucket in response: + *

For CompositeAggregation buckets, it extracts all key-value pairs from the bucket's key. For + * example, for the following CompositeAggregation bucket in response: * *

{@code
    * {
@@ -114,12 +112,28 @@ public List> parse(SearchHits hits) {
    * }
    * }
* - * It returns {@code {"firstname": "William", "lastname": "Shakespeare"}} as the response. + * It returns {@code {"firstname": "William", "lastname": "Shakespeare"}}. * - * @param bucket the composite aggregation bucket to extract data from - * @return a map containing the bucket's key-value pairs + *

For Range buckets, it creates a single key-value pair using the provided name and the + * bucket's key. + * + * @param bucket the aggregation bucket to extract data from + * @param name the field name to use for range buckets (ignored for composite buckets) + * @return an Optional containing the extracted key-value pairs, or empty if bucket type is + * unsupported */ - protected Map extract(CompositeAggregation.Bucket bucket) { - return bucket.getKey(); + protected Optional> extract( + MultiBucketsAggregation.Bucket bucket, String name) { + Map extracted; + if (bucket instanceof CompositeAggregation.Bucket compositeBucket) { + extracted = compositeBucket.getKey(); + } else if (bucket instanceof Range.Bucket + || bucket instanceof InternalAutoDateHistogram.Bucket + || bucket instanceof ParsedStringTerms.ParsedBucket) { + extracted = Map.of(name, bucket.getKey()); + } else { + extracted = null; + } + return Optional.ofNullable(extracted); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index f4fbc66d6d8..f7e134d3085 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -9,7 +9,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.stream.Collectors; import lombok.Getter; import org.apache.calcite.plan.Convention; @@ -26,7 +25,6 @@ import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.hint.RelHint; -import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalSort; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; @@ -34,13 +32,10 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.sql.type.SqlTypeName; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.search.aggregations.AggregationBuilder; -import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder; -import org.opensearch.search.aggregations.metrics.ValueCountAggregationBuilder; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.data.type.ExprCoreType; @@ -309,24 +304,6 @@ public AbstractRelNode pushDownAggregate(Aggregate aggregate, Project project) { extendedTypeMapping, outputFields.subList(0, aggregate.getGroupSet().cardinality())); newScan.pushDownContext.add(PushDownType.AGGREGATION, aggregate, action); - if (aggregationBuilder.getLeft().size() == 1 - && aggregationBuilder.getLeft().getFirst() - instanceof AutoDateHistogramAggregationBuilder autoDateHistogram) { - // If it's auto_date_histogram, filter the empty bucket by using the first aggregate metrics - RexBuilder rexBuilder = getCluster().getRexBuilder(); - Optional aggBuilderOpt = - autoDateHistogram.getSubAggregations().stream().toList().stream().findFirst(); - RexNode condition = - aggBuilderOpt.isEmpty() || aggBuilderOpt.get() instanceof ValueCountAggregationBuilder - ? rexBuilder.makeCall( - SqlStdOperatorTable.GREATER_THAN, - rexBuilder.makeInputRef(newScan, 1), - rexBuilder.makeLiteral( - 0, rexBuilder.getTypeFactory().createSqlType(SqlTypeName.INTEGER))) - : rexBuilder.makeCall( - SqlStdOperatorTable.IS_NOT_NULL, rexBuilder.makeInputRef(newScan, 1)); - return LogicalFilter.create(newScan, condition); - } return newScan; } catch (Exception e) { if (LOG.isDebugEnabled()) { From 91aaee87252feb8e676f1db2b49c011017934a1a Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Thu, 16 Oct 2025 10:27:24 +0800 Subject: [PATCH 25/26] Ignore testNestedAggregationsExplain when pushdown is disabled Signed-off-by: Yuanchun Shen --- .../org/opensearch/sql/calcite/remote/CalciteExplainIT.java | 6 ++++-- .../sql/calcite/remote/CalcitePPLCaseFunctionIT.java | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 4de7807e476..8248340a93a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1230,8 +1230,10 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { @Test public void testNestedAggregationsExplain() throws IOException { - // the query runs into error when pushdown is disabled due to bin's implementation - Assume.assumeFalse(isPushdownDisabled()); + // TODO: Remove after resolving: https://github.com/opensearch-project/sql/issues/4578 + Assume.assumeFalse( + "The query runs into error when pushdown is disabled due to bin's implementation", + isPushdownDisabled()); assertYamlEqualsJsonIgnoreId( loadExpectedPlan("agg_composite_autodate_range_metric_push.yaml"), explainQueryToString( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java index d0b253500c5..b7e16d1da8b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java @@ -18,6 +18,7 @@ import java.io.IOException; import org.json.JSONObject; +import org.junit.Assume; import org.junit.jupiter.api.Test; import org.opensearch.client.Request; import org.opensearch.sql.legacy.TestsConstants; @@ -476,6 +477,11 @@ public void testCaseAggWithNullValues() throws IOException { @Test public void testNestedCaseAggWithAutoDateHistogram() throws IOException { + // TODO: Remove after resolving: https://github.com/opensearch-project/sql/issues/4578 + Assume.assumeFalse( + "The query cannot be executed when pushdown is disabled due to implementation defects of" + + " the bin command", + isPushdownDisabled()); JSONObject actual1 = executeQuery( String.format( From 0a5a55bca0fb509678c10896b1268eff218bbb38 Mon Sep 17 00:00:00 2001 From: Yuanchun Shen Date: Wed, 22 Oct 2025 13:45:43 +0800 Subject: [PATCH 26/26] Fix explain ITs after merge Signed-off-by: Yuanchun Shen --- .../sql/calcite/remote/CalciteExplainIT.java | 48 +++++++++---------- .../calcite/agg_case_cannot_push.yaml | 2 +- .../agg_case_composite_cannot_push.yaml | 2 +- .../calcite/agg_case_num_res_cannot_push.yaml | 2 +- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index ecacec9f425..15087d5d010 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -1160,27 +1160,27 @@ public void testPushDownMinOrMaxAggOnDerivedField() throws IOException { public void testCasePushdownAsRangeQueryExplain() throws IOException { // CASE 1: Range - Metric // 1.1 Range - Metric - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_range_metric_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |" + " stats avg(age) as avg_age by age_range", TEST_INDEX_BANK))); // 1.2 Range - Metric (COUNT) - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_range_count_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40, 'u40'" + " else 'u100') | stats avg(age) by age_range", TEST_INDEX_BANK))); // 1.3 Range - Range - Metric - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_range_range_metric_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100')," + " balance_range = case(balance < 20000, 'medium' else 'high') | stats" @@ -1188,18 +1188,18 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { TEST_INDEX_BANK))); // 1.4 Range - Metric (With null & discontinuous ranges) - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_range_metric_complex_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age" + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range", TEST_INDEX_BANK))); // 1.5 Should not be pushed because the range is not closed-open - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_case_cannot_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'" + " else 'u100') | stats avg(age) as avg_age by age_range", @@ -1208,9 +1208,9 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { // 1.6 Should not be pushed as range query because the result expression is not a string // literal. // Range aggregation keys must be strings - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_case_num_res_cannot_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 30 else 100) | stats count() by" + " age_range", @@ -1218,35 +1218,35 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { // CASE 2: Composite - Range - Metric // 2.1 Composite (term) - Range - Metric - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_composite_range_metric_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)" + " by state, age_range", TEST_INDEX_BANK))); // 2.2 Composite (date histogram) - Range - Metric - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_composite_date_range_push.yaml"), - explainQueryToString( + explainQueryYaml( "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000," + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp," + " 1h)")); // 2.3 Composite(2 fields) - Range - Metric (with count) - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_composite2_range_count_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats" + " avg(balance), count() by age_range, state, gender", TEST_INDEX_BANK))); // 2.4 Composite (2 fields) - Range - Range - Metric (with count) - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_composite2_range_range_count_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range =" + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as" @@ -1254,9 +1254,9 @@ public void testCasePushdownAsRangeQueryExplain() throws IOException { TEST_INDEX_BANK))); // 2.5 Should not be pushed down as range query because case result expression is not constant - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_case_composite_cannot_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)" + " as avg_balance by age_range, state", @@ -1269,9 +1269,9 @@ public void testNestedAggregationsExplain() throws IOException { Assume.assumeFalse( "The query runs into error when pushdown is disabled due to bin's implementation", isPushdownDisabled()); - assertYamlEqualsJsonIgnoreId( + assertYamlEqualsIgnoreId( loadExpectedPlan("agg_composite_autodate_range_metric_push.yaml"), - explainQueryToString( + explainQueryYaml( String.format( "source=%s | bin timestamp bins=3 | eval value_range = case(value < 7000, 'small'" + " else 'great') | stats bucket_nullable=false avg(value), count() by" diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml index 0198a44e025..d04bbd2df44 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 'u30':VARCHAR, SEARCH($10, Sarg[[30..40]]), 'u40':VARCHAR, 'u100':VARCHAR)], age=[$10]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0BXp7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNFQVJDSCIsCiAgICAgICAgImtpbmQiOiAiU0VBUkNIIiwKICAgICAgICAic3ludGF4IjogIklOVEVSTkFMIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMTAsCiAgICAgICAgICAibmFtZSI6ICIkMTAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IHsKICAgICAgICAgICAgInJhbmdlU2V0IjogWwogICAgICAgICAgICAgIFsKICAgICAgICAgICAgICAgICJjbG9zZWQiLAogICAgICAgICAgICAgICAgIjMwIiwKICAgICAgICAgICAgICAgICI0MCIKICAgICAgICAgICAgICBdCiAgICAgICAgICAgIF0sCiAgICAgICAgICAgICJudWxsQXMiOiAiVU5LTk9XTiIKICAgICAgICAgIH0sCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1NDAiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTEwMCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAABh3CAAAACAAAAATdAAIX3JvdXRpbmd+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAZTVFJJTkd0AA5hY2NvdW50X251bWJlcn5xAH4ACnQABExPTkd0AAlmaXJzdG5hbWVxAH4ADHQAB2FkZHJlc3NzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AFHhwfnEAfgAKdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAt0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAfeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAhAAAAAHNxAH4AAAAAAAN3BAAAAAB4dAAJYmlydGhkYXRlc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0ZVR5cGWeLVKuEH3KrwIAAUwAB2Zvcm1hdHN0ABBMamF2YS91dGlsL0xpc3Q7eHEAfgAVfnEAfgAKdAAJVElNRVNUQU1QfnEAfgAbdAAERGF0ZXEAfgAgc3EAfgAAAAAAAXcEAAAAAHh0AAZnZW5kZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACdAAHa2V5d29yZHNxAH4AFXEAfgAMfnEAfgAbdAAHS2V5d29yZHEAfgAgeHQABl9pbmRleHEAfgAMdAAEY2l0eXEAfgAMdAAJX21heHNjb3JlfnEAfgAKdAAFRkxPQVR0AAZfc2NvcmVxAH4AOHQABV9zb3J0cQB+AA90AAhsYXN0bmFtZXEAfgAMdAAHYmFsYW5jZXEAfgAPdAAIZW1wbG95ZXJzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABXN0YXRlc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnEAfgAxcQB+ADJ4dAADX2lkcQB+AAx0AANhZ2V+cQB+AAp0AAdJTlRFR0VSdAAFZW1haWxzcQB+ABNxAH4AGXEAfgAccQB+ACBxAH4AJHQABG1hbGV+cQB+AAp0AAdCT09MRUFOeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},avg_age=AVG($1)), PROJECT->[avg_age, age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0BXZ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAidTMwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJTRUFSQ0giLAogICAgICAgICJraW5kIjogIlNFQVJDSCIsCiAgICAgICAgInN5bnRheCI6ICJJTlRFUk5BTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogewogICAgICAgICAgICAicmFuZ2VTZXQiOiBbCiAgICAgICAgICAgICAgWwogICAgICAgICAgICAgICAgImNsb3NlZCIsCiAgICAgICAgICAgICAgICAiMzAiLAogICAgICAgICAgICAgICAgIjQwIgogICAgICAgICAgICAgIF0KICAgICAgICAgICAgXSwKICAgICAgICAgICAgIm51bGxBcyI6ICJVTktOT1dOIgogICAgICAgICAgfSwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogInU0MCIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MTAwIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_age":{"avg":{"field":"age"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml index 833c6a16a12..82cbadeb735 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_composite_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 35), 'u35':VARCHAR, $11)], state=[$9], balance=[$7]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0An97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMTEsCiAgICAgICJuYW1lIjogIiQxMSIKICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAAGHcIAAAAIAAAABN0AAhfcm91dGluZ35yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABlNUUklOR3QADmFjY291bnRfbnVtYmVyfnEAfgAKdAAETE9OR3QACWZpcnN0bmFtZXEAfgAMdAAHYWRkcmVzc3NyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAUeHB+cQB+AAp0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AC3QABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+AB94cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ACEAAAAAc3EAfgAAAAAAA3cEAAAAAHh0AAliaXJ0aGRhdGVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRlVHlwZZ4tUq4QfcqvAgABTAAHZm9ybWF0c3QAEExqYXZhL3V0aWwvTGlzdDt4cQB+ABV+cQB+AAp0AAlUSU1FU1RBTVB+cQB+ABt0AAREYXRlcQB+ACBzcQB+AAAAAAABdwQAAAAAeHQABmdlbmRlcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAVcQB+AAx+cQB+ABt0AAdLZXl3b3JkcQB+ACB4dAAGX2luZGV4cQB+AAx0AARjaXR5cQB+AAx0AAlfbWF4c2NvcmV+cQB+AAp0AAVGTE9BVHQABl9zY29yZXEAfgA4dAAFX3NvcnRxAH4AD3QACGxhc3RuYW1lcQB+AAx0AAdiYWxhbmNlcQB+AA90AAhlbXBsb3llcnNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAFc3RhdGVzcQB+ABNxAH4AGXEAfgAccQB+ACBzcQB+AAAAAAADdwQAAAACcQB+ADFxAH4AMnh0AANfaWRxAH4ADHQAA2FnZX5xAH4ACnQAB0lOVEVHRVJ0AAVlbWFpbHNxAH4AE3EAfgAZcQB+ABxxAH4AIHEAfgAkdAAEbWFsZX5xAH4ACnQAB0JPT0xFQU54eA==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg_balance=AVG($2)), PROJECT->[avg_balance, age_range, state], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQA5nsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9CiAgXSwKICAibnVsbGFibGUiOiBmYWxzZQp9dAAEZXhwcnQCe3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJDQVNFIiwKICAgICJraW5kIjogIkNBU0UiLAogICAgInN5bnRheCI6ICJTUEVDSUFMIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiPCIsCiAgICAgICAgImtpbmQiOiAiTEVTU19USEFOIiwKICAgICAgICAic3ludGF4IjogIkJJTkFSWSIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJpbnB1dCI6IDAsCiAgICAgICAgICAibmFtZSI6ICIkMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzUsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6ICJ1MzUiLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImlucHV0IjogMSwKICAgICAgIm5hbWUiOiAiJDEiCiAgICB9CiAgXQp9dAAKZmllbGRUeXBlc3NyABFqYXZhLnV0aWwuSGFzaE1hcAUH2sHDFmDRAwACRgAKbG9hZEZhY3RvckkACXRocmVzaG9sZHhwP0AAAAAAAAx3CAAAABAAAAACdAADYWdlfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHSU5URUdFUnQABWVtYWlsc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABB4cH5xAH4ACnQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgALdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AG3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AHQAAAABzcQB+AAAAAAADdwQAAAAAeHh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"missing_order":"first","order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg_balance":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml index 7ccbec65eb7..9502c66a448 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_case_num_res_cannot_push.yaml @@ -6,4 +6,4 @@ calcite: LogicalProject(age_range=[CASE(<($10, 30), 30, 100)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQHrXsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidWR0IjogIkVYUFJfVElNRVNUQU1QIiwKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiYmlydGhkYXRlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYmFsYW5jZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZW1haWwiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCT09MRUFOIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAibWFsZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pZCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9pbmRleCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJSRUFMIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX21heHNjb3JlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiX3NvcnQiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJfcm91dGluZyIKICAgIH0KICBdLAogICJudWxsYWJsZSI6IHRydWUKfXQABGV4cHJ0Ap97CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAxMCwKICAgICAgICAgICJuYW1lIjogIiQxMCIKICAgICAgICB9LAogICAgICAgIHsKICAgICAgICAgICJsaXRlcmFsIjogMzAsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDEwMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAYdwgAAAAgAAAAE3QACF9yb3V0aW5nfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAGU1RSSU5HdAAOYWNjb3VudF9udW1iZXJ+cQB+AAp0AARMT05HdAAJZmlyc3RuYW1lcQB+AAx0AAdhZGRyZXNzc3IAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoVGV4dFR5cGWtg6OTBOMxRAIAAUwABmZpZWxkc3QAD0xqYXZhL3V0aWwvTWFwO3hyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlwmO8ygL6BTUCAANMAAxleHByQ29yZVR5cGV0ACtMb3JnL29wZW5zZWFyY2gvc3FsL2RhdGEvdHlwZS9FeHByQ29yZVR5cGU7TAALbWFwcGluZ1R5cGV0AEhMb3JnL29wZW5zZWFyY2gvc3FsL29wZW5zZWFyY2gvZGF0YS90eXBlL09wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZTtMAApwcm9wZXJ0aWVzcQB+ABR4cH5xAH4ACnQAB1VOS05PV05+cgBGb3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZSRNYXBwaW5nVHlwZQAAAAAAAAAAEgAAeHEAfgALdAAEVGV4dHNyADxzaGFkZWQuY29tLmdvb2dsZS5jb21tb24uY29sbGVjdC5JbW11dGFibGVNYXAkU2VyaWFsaXplZEZvcm0AAAAAAAAAAAIAAkwABGtleXN0ABJMamF2YS9sYW5nL09iamVjdDtMAAZ2YWx1ZXNxAH4AH3hwdXIAE1tMamF2YS5sYW5nLk9iamVjdDuQzlifEHMpbAIAAHhwAAAAAHVxAH4AIQAAAABzcQB+AAAAAAADdwQAAAAAeHQACWJpcnRoZGF0ZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGVUeXBlni1SrhB9yq8CAAFMAAdmb3JtYXRzdAAQTGphdmEvdXRpbC9MaXN0O3hxAH4AFX5xAH4ACnQACVRJTUVTVEFNUH5xAH4AG3QABERhdGVxAH4AIHNxAH4AAAAAAAF3BAAAAAB4dAAGZ2VuZGVyc3EAfgATcQB+ABlxAH4AHHEAfgAgc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABVxAH4ADH5xAH4AG3QAB0tleXdvcmRxAH4AIHh0AAZfaW5kZXhxAH4ADHQABGNpdHlxAH4ADHQACV9tYXhzY29yZX5xAH4ACnQABUZMT0FUdAAGX3Njb3JlcQB+ADh0AAVfc29ydHEAfgAPdAAIbGFzdG5hbWVxAH4ADHQAB2JhbGFuY2VxAH4AD3QACGVtcGxveWVyc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AAVzdGF0ZXNxAH4AE3EAfgAZcQB+ABxxAH4AIHNxAH4AAAAAAAN3BAAAAAJxAH4AMXEAfgAyeHQAA19pZHEAfgAMdAADYWdlfnEAfgAKdAAHSU5URUdFUnQABWVtYWlsc3EAfgATcQB+ABlxAH4AHHEAfgAgcQB+ACR0AARtYWxlfnEAfgAKdAAHQk9PTEVBTnh4\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), age_range], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"age_range":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQAe3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Ap17CiAgIm9wIjogewogICAgIm5hbWUiOiAiQ0FTRSIsCiAgICAia2luZCI6ICJDQVNFIiwKICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIjwiLAogICAgICAgICJraW5kIjogIkxFU1NfVEhBTiIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDMwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAxMDAsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAF0AANhZ2V+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdJTlRFR0VSeHg=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0}},"missing_bucket":true,"value_type":"long","missing_order":"first","order":"asc"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])