diff --git a/common/build.gradle b/common/build.gradle index bd7091819c7..43b98db6d80 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -45,7 +45,7 @@ dependencies { implementation "com.github.seancfoley:ipaddress:5.4.2" testImplementation group: 'junit', name: 'junit', version: '4.13.2' - testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.9.1' + testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.27.7' testImplementation group: 'com.google.guava', name: 'guava', version: "${guava_version}" testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: "${hamcrest_version}" testImplementation('org.junit.jupiter:junit-jupiter:5.9.3') diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 65056aecbad..50152147963 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -80,6 +80,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -99,6 +100,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -524,6 +526,12 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } + /** Build {@link LogicalEval}. */ + @Override + public LogicalPlan visitFieldFormat(Eval node, AnalysisContext context) { + throw getOnlyForCalciteException("fieldformat"); + } + @Override public LogicalPlan visitAddTotals(AddTotals node, AnalysisContext context) { throw getOnlyForCalciteException("addtotals"); @@ -534,6 +542,11 @@ public LogicalPlan visitAddColTotals(AddColTotals node, AnalysisContext context) throw getOnlyForCalciteException("addcoltotals"); } + @Override + public LogicalPlan visitMvCombine(MvCombine node, AnalysisContext context) { + throw getOnlyForCalciteException("mvcombine"); + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { @@ -704,6 +717,11 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + @Override + public LogicalPlan visitTranspose(Transpose node, AnalysisContext context) { + throw getOnlyForCalciteException("Transpose"); + } + @Override public LogicalPlan visitBin(Bin node, AnalysisContext context) { throw getOnlyForCalciteException("Bin"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..2486b63791d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -68,6 +68,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -86,6 +87,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -262,6 +264,10 @@ public T visitEval(Eval node, C context) { return visitChildren(node, context); } + public T visitFieldFormat(Eval node, C context) { + return visitChildren(node, context); + } + public T visitParse(Parse node, C context) { return visitChildren(node, context); } @@ -282,6 +288,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitTranspose(Transpose node, C context) { + return visitChildren(node, context); + } + public T visitChart(Chart node, C context) { return visitChildren(node, context); } @@ -461,4 +471,8 @@ public T visitAddTotals(AddTotals node, C context) { public T visitAddColTotals(AddColTotals node, C context) { return visitChildren(node, context); } + + public T visitMvCombine(MvCombine node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java index 5d51efd7188..0b2e05907b4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java @@ -44,6 +44,7 @@ import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -59,6 +60,7 @@ import org.opensearch.sql.ast.tree.Sort; import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -349,12 +351,6 @@ public Node visitSearch(Search node, FieldResolutionContext context) { return node; } - @Override - public Node visitAppendPipe(AppendPipe node, FieldResolutionContext context) { - visitChildren(node, context); - return node; - } - @Override public Node visitRegex(Regex node, FieldResolutionContext context) { Set regexFields = extractFieldsFromExpression(node.getField()); @@ -507,8 +503,10 @@ public Node visitFillNull(FillNull node, FieldResolutionContext context) { @Override public Node visitAppendCol(AppendCol node, FieldResolutionContext context) { - throw new IllegalArgumentException( - "AppendCol command cannot be used together with spath command"); + // dispatch requirements to subsearch and main + acceptAndVerifyNodeVisited(node.getSubSearch(), context); + visitChildren(node, context); + return node; } @Override @@ -520,9 +518,10 @@ public Node visitAppend(Append node, FieldResolutionContext context) { } @Override - public Node visitMultisearch(Multisearch node, FieldResolutionContext context) { - throw new IllegalArgumentException( - "Multisearch command cannot be used together with spath command"); + public Node visitAppendPipe(AppendPipe node, FieldResolutionContext context) { + acceptAndVerifyNodeVisited(node.getSubQuery(), context); + visitChildren(node, context); + return node; } @Override @@ -532,7 +531,16 @@ public Node visitLookup(Lookup node, FieldResolutionContext context) { @Override public Node visitValues(Values node, FieldResolutionContext context) { - throw new IllegalArgumentException("Values command cannot be used together with spath command"); + // do nothing + return node; + } + + @Override + public Node visitMultisearch(Multisearch node, FieldResolutionContext context) { + // dispatch requirements to subsearches and main + node.getSubsearches().forEach(subsearch -> acceptAndVerifyNodeVisited(subsearch, context)); + visitChildren(node, context); + return node; } @Override @@ -570,6 +578,12 @@ public Node visitTrendline(Trendline node, FieldResolutionContext context) { return node; } + @Override + public Node visitTranspose(Transpose node, FieldResolutionContext context) { + visitChildren(node, context); + return node; + } + @Override public Node visitChart(Chart node, FieldResolutionContext context) { Set chartFields = extractFieldsFromAggregation(node.getAggregationFunction()); @@ -612,6 +626,12 @@ public Node visitAddColTotals(AddColTotals node, FieldResolutionContext context) return node; } + @Override + public Node visitFieldFormat(Eval node, FieldResolutionContext context) { + visitChildren(node, context); + return node; + } + @Override public Node visitExpand(Expand node, FieldResolutionContext context) { Set expandFields = extractFieldsFromExpression(node.getField()); @@ -621,6 +641,22 @@ public Node visitExpand(Expand node, FieldResolutionContext context) { return node; } + @Override + public Node visitMvCombine(MvCombine node, FieldResolutionContext context) { + Set mvCombineFields = extractFieldsFromExpression(node.getField()); + + FieldResolutionResult current = context.getCurrentRequirements(); + + Set regularFields = new HashSet<>(current.getRegularFields()); + regularFields.addAll(mvCombineFields); + + context.pushRequirements(new FieldResolutionResult(regularFields, Set.of(ALL_FIELDS))); + + visitChildren(node, context); + context.popRequirements(); + return node; + } + private Set extractFieldsFromAggregation(UnresolvedExpression expr) { Set fields = new HashSet<>(); if (expr instanceof Alias alias) { diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index bf54d2ffd89..8b129c6267a 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -62,6 +62,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.MinSpanBin; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -468,6 +469,14 @@ public static List defaultDedupArgs() { argument("consecutive", booleanLiteral(false))); } + public static MvCombine mvcombine(Field field) { + return new MvCombine(field, null); + } + + public static MvCombine mvcombine(Field field, String delim) { + return new MvCombine(field, delim); + } + public static List sortOptions() { return exprList(argument("desc", booleanLiteral(false))); } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/Let.java b/core/src/main/java/org/opensearch/sql/ast/expression/Let.java index ad9843fae11..abd6733e06f 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/Let.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/Let.java @@ -20,6 +20,8 @@ public class Let extends UnresolvedExpression { private final Field var; private final UnresolvedExpression expression; + private final Literal concatPrefix; + private final Literal concatSuffix; public Let(Field var, UnresolvedExpression expression) { String varName = var.getField().toString(); @@ -29,6 +31,21 @@ public Let(Field var, UnresolvedExpression expression) { } this.var = var; this.expression = expression; + this.concatPrefix = null; + this.concatSuffix = null; + } + + public Let( + Field var, UnresolvedExpression expression, Literal concatPrefix, Literal concatSuffix) { + String varName = var.getField().toString(); + if (OpenSearchConstants.METADATAFIELD_TYPE_MAP.containsKey(varName)) { + throw new IllegalArgumentException( + String.format("Cannot use metadata field [%s] as the eval field.", varName)); + } + this.var = var; + this.expression = expression; + this.concatPrefix = concatPrefix; + this.concatSuffix = concatSuffix; } @Override diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/MvCombine.java b/core/src/main/java/org/opensearch/sql/ast/tree/MvCombine.java new file mode 100644 index 00000000000..ba94aa10978 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/MvCombine.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; + +@Getter +@ToString(callSuper = true) +@EqualsAndHashCode(callSuper = false) +public class MvCombine extends UnresolvedPlan { + + private final Field field; + private final String delim; + @Nullable private UnresolvedPlan child; + + public MvCombine(Field field, @Nullable String delim) { + this.field = field; + this.delim = (delim == null) ? " " : delim; + } + + public MvCombine attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? ImmutableList.of() : ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitMvCombine(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java new file mode 100644 index 00000000000..cb74692c8d1 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -0,0 +1,68 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.*; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.common.utils.StringUtils; + +/** AST node represent Transpose operation. */ +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class Transpose extends UnresolvedPlan { + private final @NonNull java.util.Map arguments; + private UnresolvedPlan child; + private static final int MAX_LIMIT_TRANSPOSE = 10000; + private static final int DEFAULT_MAX_ROWS = 5; + private static final String DEFAULT_COLUMN_NAME = "column"; + private final int maxRows; + private final String columnName; + + public Transpose(java.util.Map arguments) { + + this.arguments = arguments; + int tempMaxRows = DEFAULT_MAX_ROWS; + if (arguments.containsKey("number") && arguments.get("number").getValue() != null) { + try { + tempMaxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } catch (NumberFormatException e) { + // log warning and use default + + } + } + maxRows = tempMaxRows; + if (maxRows > MAX_LIMIT_TRANSPOSE) { + throw new IllegalArgumentException( + StringUtils.format("Maximum limit to transpose is %s", MAX_LIMIT_TRANSPOSE)); + } + if (arguments.containsKey("columnName") && arguments.get("columnName").getValue() != null) { + columnName = arguments.get("columnName").getValue().toString(); + } else { + columnName = DEFAULT_COLUMN_NAME; + } + } + + @Override + public Transpose attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitTranspose(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index d64cc205cd0..5825011f653 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -54,6 +54,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; @@ -62,7 +63,9 @@ import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlLibraryOperators; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlTrimFunction; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.MapSqlType; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -122,6 +125,7 @@ import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Paginate; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; @@ -699,6 +703,76 @@ public RelNode visitReverse( return context.relBuilder.peek(); } + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + + visitChildren(node, context); + + int maxRows = + Optional.ofNullable(node.getMaxRows()) + .filter(r -> r > 0) + .orElseThrow(() -> new IllegalArgumentException("maxRows must be positive")); + + String columnName = node.getColumnName(); + List fieldNames = + context.relBuilder.peek().getRowType().getFieldNames().stream() + .filter(fieldName -> !isMetadataField(fieldName)) + .toList(); + + RelBuilder b = context.relBuilder; + RexBuilder rx = context.rexBuilder; + RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + // Step 1: ROW_NUMBER + b.projectPlus( + b.aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)); + + // Step 2: UNPIVOT + b.unpivot( + false, + ImmutableList.of("value"), + ImmutableList.of(columnName), + fieldNames.stream() + .map( + f -> + Map.entry( + ImmutableList.of(rx.makeLiteral(f)), + ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) + .collect(Collectors.toList())); + + // Step 3: Trim spaces from columnName column before pivot + + RexNode trimmedColumnName = + context.rexBuilder.makeCall( + SqlStdOperatorTable.TRIM, + context.rexBuilder.makeFlag(SqlTrimFunction.Flag.BOTH), + context.rexBuilder.makeLiteral(" "), + b.field(columnName)); + + // Step 4: PIVOT + b.pivot( + b.groupKey(trimmedColumnName), + ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), + IntStream.rangeClosed(1, maxRows) + .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) + .collect(Collectors.toList())); + + // Step 4: RENAME + List cleanNames = new ArrayList<>(); + cleanNames.add(columnName); + for (int i = 1; i <= maxRows; i++) { + cleanNames.add("row " + i); + } + b.rename(cleanNames); + + return b.peek(); + } + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); @@ -2232,6 +2306,7 @@ public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) { prunedSubSearch.accept(this, context); subsearchNodes.add(context.relBuilder.build()); } + subsearchNodes = DynamicFieldsHelper.adjustInputsForDynamicFields(subsearchNodes, context); // Use shared schema merging logic that handles type conflicts via field renaming List alignedNodes = @@ -3096,6 +3171,174 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { return context.relBuilder.peek(); } + /** + * mvcombine command visitor to collapse rows that are identical on all non-target fields. + * + *

Grouping semantics: + * + *

    + *
  • The target field is always excluded from the GROUP BY keys. + *
  • Metadata fields (for example {@code _id}, {@code _index}, {@code _score}) are excluded + * from GROUP BY keys unless they were explicitly projected earlier (for + * example, via {@code fields}). + *
+ * + *

The target field values are aggregated using {@code ARRAY_AGG}, with {@code NULL} values + * filtered out. The aggregation result replaces the original target column and produces an {@code + * ARRAY} output. + * + *

The original output column order is preserved. Metadata fields are projected as typed {@code + * NULL} literals after aggregation only when they are not part of grouping (since they were + * skipped). + * + * @param node mvcombine command to be visited + * @param context CalcitePlanContext containing the RelBuilder and planning context + * @return RelNode representing collapsed records with the target combined into a multivalue array + * @throws SemanticCheckException if the mvcombine target is not a direct field reference + */ + @Override + public RelNode visitMvCombine(MvCombine node, CalcitePlanContext context) { + // 1) Lower the child plan first so the RelBuilder has the input schema on the stack. + visitChildren(node, context); + + final RelBuilder relBuilder = context.relBuilder; + + final RelNode input = relBuilder.peek(); + final List inputFieldNames = input.getRowType().getFieldNames(); + final List inputFieldTypes = + input.getRowType().getFieldList().stream().map(RelDataTypeField::getType).toList(); + + // If true, we should NOT auto-skip meta fields (because user explicitly projected them) + final boolean includeMetaFields = context.isProjectVisited(); + + // 2) Resolve the mvcombine target to an input column index (must be a direct field reference). + final Field targetField = node.getField(); + final int targetIndex = resolveTargetIndex(targetField, context); + final String targetName = inputFieldNames.get(targetIndex); + + // 3) Group by all non-target fields, skipping meta fields unless explicitly projected. + final List groupExprs = + buildGroupExpressionsExcludingTarget( + targetIndex, inputFieldNames, relBuilder, includeMetaFields); + + // 4) Aggregate target values using ARRAY_AGG, filtering out NULLs. + performArrayAggAggregation(relBuilder, targetIndex, targetName, groupExprs); + + // 5) Restore original output column order (ARRAY_AGG already returns ARRAY). + restoreColumnOrderAfterArrayAgg( + relBuilder, inputFieldNames, inputFieldTypes, targetIndex, groupExprs, includeMetaFields); + + return relBuilder.peek(); + } + + /** Resolves the mvcombine target expression to an input field index. */ + private int resolveTargetIndex(Field targetField, CalcitePlanContext context) { + final RexNode targetRex; + try { + targetRex = rexVisitor.analyze(targetField, context); + } catch (IllegalArgumentException e) { + // Make missing-field behavior deterministic (and consistently mapped to 4xx) + // instead of leaking RelBuilder/rexVisitor exception wording. + throw new SemanticCheckException( + "mvcombine target field not found: " + targetField.getField().toString(), e); + } + + if (!isInputRef(targetRex)) { + throw new SemanticCheckException( + "mvcombine target must be a direct field reference, but got: " + targetField); + } + + final int index = ((RexInputRef) targetRex).getIndex(); + + final RelDataType fieldType = + context.relBuilder.peek().getRowType().getFieldList().get(index).getType(); + + if (SqlTypeUtil.isArray(fieldType) || SqlTypeUtil.isMultiset(fieldType)) { + throw new SemanticCheckException( + "mvcombine target cannot be an array/multivalue type, but got: " + fieldType); + } + + return index; + } + + /** + * Builds group-by expressions for mvcombine: all non-target input fields; meta fields are skipped + * unless includeMetaFields is true. + */ + private List buildGroupExpressionsExcludingTarget( + int targetIndex, + List inputFieldNames, + RelBuilder relBuilder, + boolean includeMetaFields) { + + final List groupExprs = new ArrayList<>(Math.max(0, inputFieldNames.size() - 1)); + for (int i = 0; i < inputFieldNames.size(); i++) { + if (i == targetIndex) { + continue; + } + if (isMetadataField(inputFieldNames.get(i)) && !includeMetaFields) { + continue; + } + groupExprs.add(relBuilder.field(i)); + } + return groupExprs; + } + + /** Applies mvcombine aggregation. */ + private void performArrayAggAggregation( + RelBuilder relBuilder, int targetIndex, String targetName, List groupExprs) { + + final RexNode targetRef = relBuilder.field(targetIndex); + final RexNode notNullTarget = relBuilder.isNotNull(targetRef); + + final RelBuilder.AggCall aggCall = + relBuilder + .aggregateCall(SqlLibraryOperators.ARRAY_AGG, targetRef) + .filter(notNullTarget) + .as(targetName); + + relBuilder.aggregate(relBuilder.groupKey(groupExprs), aggCall); + } + + /** + * Restores the original output column order after the aggregate step. Meta fields are set to + * typed NULL only when they were skipped from grouping (includeMetaFields=false). + */ + private void restoreColumnOrderAfterArrayAgg( + RelBuilder relBuilder, + List inputFieldNames, + List inputFieldTypes, + int targetIndex, + List groupExprs, + boolean includeMetaFields) { + + final int aggregatedTargetPos = groupExprs.size(); + + final List projections = new ArrayList<>(inputFieldNames.size()); + final List projectionNames = new ArrayList<>(inputFieldNames.size()); + + int groupPos = 0; + for (int i = 0; i < inputFieldNames.size(); i++) { + final String fieldName = inputFieldNames.get(i); + projectionNames.add(fieldName); + + if (i == targetIndex) { + // aggregated target is always the last field in the aggregate output + projections.add(relBuilder.field(aggregatedTargetPos)); + } else if (isMetadataField(fieldName) && !includeMetaFields) { + // meta fields were skipped from grouping => not present in aggregate output => keep schema + // stable + projections.add(relBuilder.getRexBuilder().makeNullLiteral(inputFieldTypes.get(i))); + } else { + // grouped field (including meta fields when includeMetaFields=true) + projections.add(relBuilder.field(groupPos)); + groupPos++; + } + } + + relBuilder.project(projections, projectionNames, /* force= */ true); + } + @Override public RelNode visitValues(Values values, CalcitePlanContext context) { if (values.getValues() == null || values.getValues().isEmpty()) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 9354bcc3329..04ddd4b7671 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -321,6 +321,28 @@ public RexNode visitLambdaFunction(LambdaFunction node, CalcitePlanContext conte @Override public RexNode visitLet(Let node, CalcitePlanContext context) { RexNode expr = analyze(node.getExpression(), context); + if (node.getConcatPrefix() != null) { + + expr = + context.rexBuilder.makeCall( + SqlStdOperatorTable.CONCAT, + context.rexBuilder.makeLiteral( + node.getConcatPrefix().getValue(), + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), + true), + expr); + } + if (node.getConcatSuffix() != null) { + + expr = + context.rexBuilder.makeCall( + SqlStdOperatorTable.CONCAT, + expr, + context.rexBuilder.makeLiteral( + node.getConcatSuffix().getValue(), + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR), + true)); + } return context.relBuilder.alias(expr, node.getVar().getField().toString()); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/DynamicFieldsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/DynamicFieldsHelper.java index a8073772a95..05eb48234e7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/DynamicFieldsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/DynamicFieldsHelper.java @@ -10,6 +10,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; @@ -99,8 +100,9 @@ static void adjustJoinInputsForDynamicFields( // build once to modify the inputs already in the stack. RelNode right = context.relBuilder.build(); RelNode left = context.relBuilder.build(); - left = adjustFieldsForDynamicFields(left, right, context); - right = adjustFieldsForDynamicFields(right, left, context); + List inputs = adjustInputsForDynamicFields(List.of(right, left), context); + right = inputs.get(0); + left = inputs.get(1); context.relBuilder.push(left); // `as(alias)` is needed since `build()` won't preserve alias leftAlias.map(alias -> context.relBuilder.as(alias)); @@ -119,6 +121,36 @@ static RelNode adjustFieldsForDynamicFields( return target; } + /** Adjust inputs to align the static/dynamic fields each other */ + static List adjustInputsForDynamicFields( + List inputs, CalcitePlanContext context) { + boolean requireAdjustment = inputs.stream().anyMatch(input -> hasDynamicFields(input)); + if (requireAdjustment) { + List requiredStaticFields = getRequiredStaticFields(inputs); + return inputs.stream() + .map(input -> adjustFieldsForDynamicFields(input, requiredStaticFields, context)) + .collect(Collectors.toList()); + } else { + return inputs; + } + } + + static List getRequiredStaticFields(List inputs) { + Set requiredStaticFields = new HashSet(); + for (RelNode input : inputs) { + if (hasDynamicFields(input)) { + requiredStaticFields.addAll(getStaticFields(input)); + } + } + return toSortedList(requiredStaticFields); + } + + private static List toSortedList(Collection collection) { + ArrayList result = new ArrayList<>(collection); + Collections.sort(result); + return result; + } + /** * Project node's fields in `requiredFieldNames` as static field, and put other fields into `_MAP` * (dynamic fields) This projection is needed when merging an input with dynamic fields and an @@ -128,16 +160,27 @@ static RelNode adjustFieldsForDynamicFields( static RelNode adjustFieldsForDynamicFields( RelNode node, List staticFieldNames, CalcitePlanContext context) { context.relBuilder.push(node); - List existingFields = node.getRowType().getFieldNames(); + List existingFields = getStaticFields(node); List project = new ArrayList<>(); for (String existingField : existingFields) { if (staticFieldNames.contains(existingField)) { project.add(context.rexBuilder.makeInputRef(node, existingFields.indexOf(existingField))); } } - project.add( - context.relBuilder.alias( - getFieldsAsMap(existingFields, staticFieldNames, context), DYNAMIC_FIELDS_MAP)); + if (hasDynamicFields(node)) { + // _MAP = MAP_APPEND(_MAP, MAP(existingFields - staticFields)) + RexNode existingDynamicFieldsMap = context.relBuilder.field(DYNAMIC_FIELDS_MAP); + RexNode additionalFieldsMap = getFieldsAsMap(existingFields, staticFieldNames, context); + RexNode mapAppend = + context.rexBuilder.makeCall( + BuiltinFunctionName.MAP_APPEND, existingDynamicFieldsMap, additionalFieldsMap); + project.add(context.relBuilder.alias(mapAppend, DYNAMIC_FIELDS_MAP)); + } else { + // _MAP = MAP(existingFields - staticFields) + project.add( + context.relBuilder.alias( + getFieldsAsMap(existingFields, staticFieldNames, context), DYNAMIC_FIELDS_MAP)); + } return context.relBuilder.project(project).build(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index ec0eed5a97d..b4e040762af 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -80,6 +80,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; + String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { diff --git a/direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java b/direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java index 687274fab38..bb5eaf153f7 100644 --- a/direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java +++ b/direct-query-core/src/main/java/org/opensearch/sql/prometheus/client/PrometheusClientImpl.java @@ -26,6 +26,7 @@ import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; +import org.opensearch.secure_sm.AccessController; import org.opensearch.sql.prometheus.exception.PrometheusClientException; import org.opensearch.sql.prometheus.model.MetricMetadata; @@ -91,7 +92,7 @@ public JSONObject queryRange( Request request = new Request.Builder().url(queryUrl).build(); logger.debug("Executing Prometheus request with headers: {}", request.headers().toString()); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); logger.debug("Received Prometheus response for query_range: code={}", response); @@ -126,7 +127,7 @@ public JSONObject query(String query, Long time, Integer limit, Integer timeout) Request request = new Request.Builder().url(queryUrl).build(); logger.info("Executing Prometheus request with headers: {}", request.headers().toString()); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); logger.info("Received Prometheus response for instant query: code={}", response); // Return the full response object, not just the data field @@ -146,7 +147,7 @@ public List getLabels(Map queryParams) throws IOExceptio "%s/api/v1/labels%s", prometheusUri.toString().replaceAll("/$", ""), queryString); logger.debug("queryUrl: " + queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); return toListOfLabels(jsonObject.getJSONArray("data")); } @@ -161,7 +162,7 @@ public List getLabel(String labelName, Map queryParams) prometheusUri.toString().replaceAll("/$", ""), labelName, queryString); logger.debug("queryUrl: " + queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); return toListOfLabels(jsonObject.getJSONArray("data")); } @@ -175,7 +176,7 @@ public Map> getAllMetrics(Map query "%s/api/v1/metadata%s", prometheusUri.toString().replaceAll("/$", ""), queryString); logger.debug("queryUrl: " + queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); TypeReference>> typeRef = new TypeReference<>() {}; return new ObjectMapper().readValue(jsonObject.getJSONObject("data").toString(), typeRef); @@ -194,7 +195,7 @@ public List> getSeries(Map queryParams) thro "%s/api/v1/series%s", prometheusUri.toString().replaceAll("/$", ""), queryString); logger.debug("queryUrl: " + queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); JSONArray dataArray = jsonObject.getJSONArray("data"); return toListOfSeries(dataArray); @@ -211,7 +212,7 @@ public JSONArray queryExemplars(String query, Long start, Long end) throws IOExc end); logger.debug("queryUrl: " + queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); return jsonObject.getJSONArray("data"); } @@ -222,7 +223,7 @@ public JSONObject getAlerts() throws IOException { String.format("%s/api/v1/alerts", prometheusUri.toString().replaceAll("/$", "")); logger.debug("Making Prometheus alerts request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); return jsonObject.getJSONObject("data"); } @@ -235,7 +236,7 @@ public JSONObject getRules(Map queryParams) throws IOException { "%s/api/v1/rules%s", prometheusUri.toString().replaceAll("/$", ""), queryString); logger.debug("Making Prometheus rules request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.prometheusHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.prometheusHttpClient.newCall(request).execute()); JSONObject jsonObject = readResponse(response); return jsonObject.getJSONObject("data"); } @@ -248,7 +249,7 @@ public JSONArray getAlertmanagerAlerts(Map queryParams) throws I logger.debug("Making Alertmanager alerts request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.alertmanagerHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.alertmanagerHttpClient.newCall(request).execute()); return readAlertmanagerResponse(response); } @@ -261,7 +262,7 @@ public JSONArray getAlertmanagerAlertGroups(Map queryParams) thr logger.debug("Making Alertmanager alert groups request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.alertmanagerHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.alertmanagerHttpClient.newCall(request).execute()); return readAlertmanagerResponse(response); } @@ -273,7 +274,7 @@ public JSONArray getAlertmanagerReceivers() throws IOException { logger.debug("Making Alertmanager receivers request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.alertmanagerHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.alertmanagerHttpClient.newCall(request).execute()); return readAlertmanagerResponse(response); } @@ -285,7 +286,7 @@ public JSONArray getAlertmanagerSilences() throws IOException { logger.debug("Making Get Alertmanager silences request: {}", queryUrl); Request request = new Request.Builder().url(queryUrl).build(); - Response response = this.alertmanagerHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.alertmanagerHttpClient.newCall(request).execute()); return readAlertmanagerResponse(response); } @@ -301,7 +302,7 @@ public String createAlertmanagerSilences(String silenceJson) throws IOException .header("Content-Type", "application/json") .post(RequestBody.create(silenceJson.getBytes(StandardCharsets.UTF_8))) .build(); - Response response = this.alertmanagerHttpClient.newCall(request).execute(); + Response response = AccessController.doPrivilegedChecked(() -> this.alertmanagerHttpClient.newCall(request).execute()); if (response.isSuccessful()) { return Objects.requireNonNull(response.body()).string(); diff --git a/docs/category.json b/docs/category.json index 094768d1e6f..bcf73cb1a82 100644 --- a/docs/category.json +++ b/docs/category.json @@ -17,12 +17,14 @@ "user/ppl/cmd/describe.md", "user/ppl/cmd/eventstats.md", "user/ppl/cmd/eval.md", + "user/ppl/cmd/fieldformat.md", "user/ppl/cmd/fields.md", "user/ppl/cmd/fillnull.md", "user/ppl/cmd/grok.md", "user/ppl/cmd/head.md", "user/ppl/cmd/join.md", "user/ppl/cmd/lookup.md", + "user/ppl/cmd/mvcombine.md", "user/ppl/cmd/parse.md", "user/ppl/cmd/patterns.md", "user/ppl/cmd/rare.md", @@ -43,6 +45,7 @@ "user/ppl/cmd/timechart.md", "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", + "user/ppl/cmd/transpose.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/dev/ppl-commands.md b/docs/dev/ppl-commands.md index 5c3538883a9..617f7870456 100644 --- a/docs/dev/ppl-commands.md +++ b/docs/dev/ppl-commands.md @@ -52,7 +52,7 @@ If you are working on contributing a new PPL command, please read this guide and - Add a test in `PPLQueryDataAnonymizerTest` - [ ] **Cross-cluster Tests (optional, nice to have):** - - Add a test in `CrossClusterSearchIT` + - Add a test in `CrossClusterSearchIT`, or in `CalciteCrossClusterSearchIT` if the command requires Calcite. - [ ] **User doc:** - Add a xxx.md under `docs/user/ppl/cmd` and link the new doc to `docs/user/ppl/index.md` diff --git a/docs/user/dql/metadata.rst b/docs/user/dql/metadata.rst index e959a69c8b6..e4f55ef1b3e 100644 --- a/docs/user/dql/metadata.rst +++ b/docs/user/dql/metadata.rst @@ -35,7 +35,7 @@ Example 1: Show All Indices Information SQL query:: os> SHOW TABLES LIKE '%' - fetched rows / total rows = 23/23 + fetched rows / total rows = 24/24 +----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+ | TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION | |----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------| @@ -48,6 +48,7 @@ SQL query:: | docTestCluster | null | events_many_hosts | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | events_null | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null | + | docTestCluster | null | mvcombine_data | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null | | docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null | diff --git a/docs/user/ppl/cmd/fieldformat.md b/docs/user/ppl/cmd/fieldformat.md new file mode 100644 index 00000000000..cf03205794c --- /dev/null +++ b/docs/user/ppl/cmd/fieldformat.md @@ -0,0 +1,128 @@ + +# fieldformat + +The `fieldformat` command sets the value to a field with the specified expression and appends the field with evaluated result to the search results. The command is an alias of eval command. +Additionally, it also provides string concatenation dot operator followed by and/or follows a string that will be concatenated to the expression. + + +## Syntax + +The `fieldformat` command has the following syntax: + +```syntax + fieldformat =[(prefix).][.(suffix)] ["," =[(prefix).][.(suffix)] ]... + +``` + +## Parameters + +The `fieldformat` command supports the following parameters. + +| Parameter| Required/Optional | Description | +|----------------|-------------------|-----------------------------------------------------------------------------------------------------------------------------------------------| +| `` | Required | The name of the field to create or update. If the field does not exist, a new field is added. If it already exists, its value is overwritten. | +| `` | Required | The expression to evaluate. The expression can have a prefix and/or suffix string part that will be concatenated to the expression. | +| `prefix` | Optional | A string before the expression followed by dot operator which will be concatenated as prefix to the evaluated expression value. | +| `suffix` | Optional | A string that follows the expression and dot operator which will be concatenated as suffix to the evaluated expression value. | + + +## Example 1: Create a new field + +The following query creates a new `doubleAge` field for each document: + +```ppl +source=accounts +| fieldformat doubleAge = age * 2 +| fields age, doubleAge +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----+-----------+ +| age | doubleAge | +|-----+-----------| +| 32 | 64 | +| 36 | 72 | +| 28 | 56 | +| 33 | 66 | ++-----+-----------+ +``` + + +## Example 2: Override an existing field + +The following query overrides the `age` field by adding `1` to its value: + +```ppl +source=accounts +| fieldformat age = age + 1 +| fields age +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----+ +| age | +|-----| +| 33 | +| 37 | +| 29 | +| 34 | ++-----+ +``` + + + + +## Example 3: String concatenation with prefix + +The following query uses the `.` (dot) operator for string concatenation. You can concatenate string literals and field values as follows: + +```ppl +source=accounts +| fieldformat greeting = 'Hello '.tostring( firstname) +| fields firstname, greeting +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+---------------+ +| firstname | greeting | +|-----------+---------------| +| Amber | Hello Amber | +| Hattie | Hello Hattie | +| Nanette | Hello Nanette | +| Dale | Hello Dale | ++-----------+---------------+ +``` + + +## Example 4: String concatenation with dot operator, prefix and suffix + +The following query performs prefix and suffix string concatenation operations using dot operator: + +```ppl +source=accounts | fieldformat age_info = 'Age: '.CAST(age AS STRING).' years.' | fields firstname, age, age_info +``` + +The query returns the following results: + +```text +fetched rows / total rows = 4/4 ++-----------+-----+----------------+ +| firstname | age | age_info | +|-----------+-----+----------------| +| Amber | 32 | Age: 32 years. | +| Hattie | 36 | Age: 36 years. | +| Nanette | 28 | Age: 28 years. | +| Dale | 33 | Age: 33 years. | ++-----------+-----+----------------+ +``` + + diff --git a/docs/user/ppl/cmd/mvcombine.md b/docs/user/ppl/cmd/mvcombine.md new file mode 100644 index 00000000000..4ccad724ca7 --- /dev/null +++ b/docs/user/ppl/cmd/mvcombine.md @@ -0,0 +1,129 @@ +# mvcombine + +## Description + +The `mvcombine` command groups rows that are identical across all fields except a specified target field, and combines the values of that target field into a multivalue (array) field. All other fields in the input rows are preserved as group keys in the output. + +`mvcombine` is a transforming command: it consumes a set of input results and produces a new result set with reduced cardinality. + +### Key behaviors + +- Rows are grouped by **all fields currently in the pipeline except the target field**. +- One output row is produced per group. +- The target field is **replaced** with a multivalue (array) field that contains all non-null values of the target field from the grouped rows. +- Rows where the target field is missing or null do **not** contribute a value to the combined multivalue output. +- The default output is a multivalue representation (array). + +--- + +## Syntax + +mvcombine + +### Arguments + +- **field** (required) + The name of the field whose values are combined into a multivalue field. + +--- + +## Example 1: Basic mvcombine + +Given the following input rows: + +```text +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"10"} +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"20"} +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"30"} +``` +The following query collapses the three rows into a single row, and combines packets_str into a multivalue field: + +```ppl +source=mvcombine_data +| where ip='10.0.0.1' and bytes=100 and tags='t1' +| fields ip, bytes, tags, packets_str +| mvcombine packets_str +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------+-------+------+-------------+ +| ip | bytes | tags | packets_str | +|----------+-------+------+-------------| +| 10.0.0.1 | 100 | t1 | [10,20,30] | ++----------+-------+------+-------------+ +``` + +## Example 2: Multiple groups + +Given a dataset mvcombine with the following data: +```text +{"ip":"10.0.0.7","bytes":700,"tags":"t7","packets_str":"1"} +{"ip":"10.0.0.7","bytes":700,"tags":"t7","packets_str":"2"} +{"ip":"10.0.0.8","bytes":700,"tags":"t7","packets_str":"9"} +``` + +The following query produces one output row per group key: +```ppl +source=mvcombine_data +| where bytes=700 and tags='t7' +| fields ip, bytes, tags, packets_str +| sort ip, packets_str +| mvcombine packets_str +| sort ip +``` + +Expected output: +```text +fetched rows / total rows = 2/2 ++----------+-------+------+-------------+ +| ip | bytes | tags | packets_str | +|----------+-------+------+-------------| +| 10.0.0.7 | 700 | t7 | [1,2] | +| 10.0.0.8 | 700 | t7 | [9] | ++----------+-------+------+-------------+ +``` + +## Example 3: Missing target field in some rows + +Rows missing the target field do not contribute a value to the combined output. + +Given a dataset mvcombine with the following data: +```text +{"ip":"10.0.0.3","bytes":300,"tags":"t3","packets_str":"5"} +{"ip":"10.0.0.3","bytes":300,"tags":"t3"} +{"ip":"10.0.0.3","bytes":300,"tags":"t3","letters":"a"} +``` + +The following query collapses the group and preserves the non-missing value: +```ppl +source=mvcombine_data +| where ip='10.0.0.3' and bytes=300 and tags='t3' +| fields ip, bytes, tags, packets_str +| mvcombine packets_str +``` + +Expected output: +```text +fetched rows / total rows = 1/1 ++----------+-------+------+-------------+ +| ip | bytes | tags | packets_str | +|----------+-------+------+-------------| +| 10.0.0.3 | 300 | t3 | [5] | ++----------+-------+------+-------------+ +``` + +## Example 4: Error when field does not exist + +If the specified field does not exist in the current schema, mvcombine returns an error. +```ppl +source=mvcombine_data +| mvcombine does_not_exist +``` + +Expected output: +```text +{'reason': 'Invalid Query', 'details': 'Field [does_not_exist] not found.', 'type': 'IllegalArgumentException'} +Error: Query returned no data +``` \ No newline at end of file diff --git a/docs/user/ppl/cmd/spath.md b/docs/user/ppl/cmd/spath.md index f107b0df030..11d251a9576 100644 --- a/docs/user/ppl/cmd/spath.md +++ b/docs/user/ppl/cmd/spath.md @@ -41,7 +41,7 @@ For more information about path syntax, see [json_extract](../functions/json.md# * **Limitation**: Field order in the result could be inconsistent with query without `spath` command, and the behavior might change in the future version. * **Limitation**: Filter with subquery (`where in/exists [...]`) is not supported with `spath` command. * **Limitation**: `fillnull` command requires to specify fields when used with `spath` command. -* **Limitation**: Following commands cannot be used together with `spath` command: `appendcol`, `multisearch`, `lookup`. +* **Limitation**: Following commands cannot be used together with `spath` command: `lookup`. * **Performance**: Filter records before `spath` command for best performance (see Example 8) * **Internal Implementation**: The auto extraction feature uses an internal `_MAP` system column to store dynamic fields during query processing. This column is automatically expanded into individual columns in the final results and users don't need to reference it directly. For more information, see [System Columns](../general/identifiers.md#system-columns). diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md new file mode 100644 index 00000000000..442d4b9716d --- /dev/null +++ b/docs/user/ppl/cmd/transpose.md @@ -0,0 +1,92 @@ +# transpose + +## Description + +The `transpose` command outputs the requested number of rows as columns, effectively transposing each result row into a corresponding column of field values. + +## Syntax + +transpose [int] [column_name=] + +* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 10000. +* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. + + +## Example 1: Transpose results + +This example shows transposing wihtout any parameters. It transforms 5 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | row 5 | +|----------------+-------+--------+---------+-------+-------| +| account_number | 1 | 6 | 13 | 18 | null | +| firstname | Amber | Hattie | Nanette | Dale | null | +| balance | 39225 | 5686 | 32838 | 4180 | null | +| lastname | Duke | Bond | Bates | Adams | null | ++----------------+-------+--------+---------+-------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows and first column with specified column name. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 column_name='column_names' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column_names | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Limitations + +The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 30ad7159182..12afe96eea0 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -78,9 +78,11 @@ source=accounts | [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | | [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | | [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | - | [addtotals command](cmd/addtotals.md) | 3.4 | stable (since 3.4) | Adds row and column values and appends a totals column and row. | - | [addcoltotals command](cmd/addcoltotals.md) | 3.4 | stable (since 3.4) | Adds column values and appends a totals row. | - +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | +| [mvcombine command](cmd/mvcombine.md) | 3.5 | stable (since 3.4) | Combines values of a specified field across rows identical on all other fields. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/doctest/test_data/mvcombine.json b/doctest/test_data/mvcombine.json new file mode 100644 index 00000000000..60c08ebd8a9 --- /dev/null +++ b/doctest/test_data/mvcombine.json @@ -0,0 +1,18 @@ +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"10"} +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"20"} +{"ip":"10.0.0.1","bytes":100,"tags":"t1","packets_str":"30"} +{"ip":"10.0.0.2","bytes":200,"tags":"t2","packets_str":"7"} +{"ip":"10.0.0.3","bytes":300,"tags":"t3","packets_str":"5"} +{"ip":"10.0.0.3","bytes":300,"tags":"t3"} +{"ip":"10.0.0.3","bytes":300,"tags":"t3","letters":"a"} +{"ip":"10.0.0.7","bytes":700,"tags":"t7","packets_str":"1"} +{"ip":"10.0.0.7","bytes":700,"tags":"t7","packets_str":"2"} +{"ip":"10.0.0.8","bytes":700,"tags":"t7","packets_str":"9"} +{"ip":"10.0.0.9","bytes":900,"tags":"t9","packets_str":"1"} +{"ip":"10.0.0.9","bytes":900,"tags":"t9","packets_str":"2"} +{"ip":"10.0.0.9","bytes":900,"tags":"t9","packets_str":"3"} +{"ip":"10.0.0.5","bytes":500,"tags":"t5","packets_str":"dup"} +{"ip":"10.0.0.5","bytes":500,"tags":"t5","packets_str":"dup"} +{"ip":"10.0.0.5","bytes":500,"tags":"t5","packets_str":"x"} +{"ip":"10.0.0.6","bytes":600,"tags":"t6","packets_str":""} +{"ip":"10.0.0.6","bytes":600,"tags":"t6","packets_str":"z"} diff --git a/doctest/test_docs.py b/doctest/test_docs.py index e57c41d6827..6283252065f 100644 --- a/doctest/test_docs.py +++ b/doctest/test_docs.py @@ -57,7 +57,8 @@ 'otellogs': 'otellogs.json', 'time_data': 'time_test_data.json', 'time_data2': 'time_test_data2.json', - 'time_test': 'time_test.json' + 'time_test': 'time_test.json', + 'mvcombine_data': 'mvcombine.json', } DEBUG_MODE = os.environ.get('DOCTEST_DEBUG', 'false').lower() == 'true' diff --git a/doctest/test_mapping/mvcombine.json b/doctest/test_mapping/mvcombine.json new file mode 100644 index 00000000000..06e00747e1f --- /dev/null +++ b/doctest/test_mapping/mvcombine.json @@ -0,0 +1,12 @@ +{ + "mappings": { + "properties": { + "case": { "type": "keyword" }, + "ip": { "type": "ip" }, + "bytes": { "type": "long" }, + "tags": { "type": "keyword" }, + "packets_str": { "type": "keyword" }, + "letters": { "type": "keyword" } + } + } +} diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 21a28f2faf8..b914cb0cbe0 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -406,7 +406,6 @@ task integTestWithSecurity(type: RestIntegTestTask) { configureSecurityPlugin(cluster) } - useJUnitPlatform() dependsOn ':opensearch-sql-plugin:bundlePlugin' testLogging { events "passed", "skipped", "failed" @@ -447,11 +446,8 @@ task integTestWithSecurity(type: RestIntegTestTask) { jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' } - // NOTE: this IT config discovers only junit5 (jupiter) tests. - // https://github.com/opensearch-project/sql/issues/1974 filter { - includeTestsMatching 'org.opensearch.sql.security.CrossClusterSearchIT' - includeTestsMatching 'org.opensearch.sql.security.PPLPermissionsIT' + includeTestsMatching 'org.opensearch.sql.security.*' } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 22a6f6b5916..50cdd8d847c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -34,6 +34,7 @@ CalciteDedupCommandIT.class, CalciteDescribeCommandIT.class, CalciteExpandCommandIT.class, + CalciteFieldFormatCommandIT.class, CalciteFieldsCommandIT.class, CalciteFillNullCommandIT.class, CalciteFlattenCommandIT.class, @@ -104,9 +105,11 @@ CalciteTextFunctionIT.class, CalciteTopCommandIT.class, CalciteTrendlineCommandIT.class, + CalciteTransposeCommandIT.class, CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, - CalcitePPLTpchIT.class + CalcitePPLTpchIT.class, + CalciteMvCombineCommandIT.class }) public class CalciteNoPushdownIT { private static boolean wasPushdownEnabled; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d55ca8d82b0..6c25242c110 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Locale; +import org.apache.commons.text.StringEscapeUtils; import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ast.statement.ExplainMode; @@ -2050,6 +2051,18 @@ public void testaddColTotalsExplain() throws IOException { + "| addcoltotals balance age label='GrandTotal'")); } + @Test + public void testTransposeExplain() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_transpose.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| transpose 4 column_name='column_names'")); + } + public void testComplexDedup() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); @@ -2473,4 +2486,31 @@ public void testAggFilterOnNestedFields() throws IOException { "source=%s | stats count(eval(author.name < 'K')) as george_and_jk", TEST_INDEX_CASCADED_NESTED))); } + + @Test + public void testExplainMvCombine() throws IOException { + String query = + "source=opensearch-sql_test_index_account " + + "| fields state, city, age " + + "| mvcombine age delim=','"; + + String actual = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_mvcombine.yaml"); + assertYamlEqualsIgnoreId(expected, actual); + } + + @Test + public void testFieldFormatExplain() throws Exception { + + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_field_format.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + StringEscapeUtils.escapeJson( + StringUtils.format( + "source=%s | head 5| fieldformat formatted_balance =" + + " \"$\".tostring(balance,\"commas\") ", + TEST_INDEX_ACCOUNT)))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java new file mode 100644 index 00000000000..86f87c90c81 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteFieldFormatCommandIT.java @@ -0,0 +1,122 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.apache.commons.text.StringEscapeUtils; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteFieldFormatCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + + loadIndex(Index.BANK); + + // Create test data for string concatenation + Request request1 = new Request("PUT", "/test_eval/_doc/1?refresh=true"); + request1.setJsonEntity("{\"name\": \"Alice\", \"age\": 25, \"title\": \"Engineer\"}"); + client().performRequest(request1); + + Request request2 = new Request("PUT", "/test_eval/_doc/2?refresh=true"); + request2.setJsonEntity("{\"name\": \"Bob\", \"age\": 30, \"title\": \"Manager\"}"); + client().performRequest(request2); + + Request request3 = new Request("PUT", "/test_eval/_doc/3?refresh=true"); + request3.setJsonEntity("{\"name\": \"Charlie\", \"age\": null, \"title\": \"Analyst\"}"); + client().performRequest(request3); + } + + @Test + public void testFieldFormatStringConcatenation() throws IOException { + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + "source=test_eval | fieldformat greeting = 'Hello ' + name")); + verifySchema( + result, + schema("name", "string"), + schema("title", "string"), + schema("age", "bigint"), + schema("greeting", "string")); + verifyDataRows( + result, + rows("Alice", "Engineer", 25, "Hello Alice"), + rows("Bob", "Manager", 30, "Hello Bob"), + rows("Charlie", "Analyst", null, "Hello Charlie")); + } + + @Test + public void testFieldFormatStringConcatenationWithNullFieldToString() throws IOException { + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + "source=test_eval | fieldformat age_desc = \"Age: \".tostring(age,\"commas\") |" + + " fields name, age, age_desc")); + verifySchema( + result, schema("name", "string"), schema("age", "bigint"), schema("age_desc", "string")); + verifyDataRows( + result, + rows("Alice", 25, "Age: 25"), + rows("Bob", 30, "Age: 30"), + rows("Charlie", null, null)); + } + + @Test + public void testFieldFormatStringConcatenationWithNullField() throws IOException { + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + "source=test_eval | fieldformat age_desc = \"Age: \".CAST(age AS STRING) | fields" + + " name, age, age_desc")); + verifySchema( + result, schema("name", "string"), schema("age", "bigint"), schema("age_desc", "string")); + verifyDataRows( + result, + rows("Alice", 25, "Age: 25"), + rows("Bob", 30, "Age: 30"), + rows("Charlie", null, null)); + } + + @Test + public void testFieldFormatStringConcatWithSuffix() throws IOException { + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + "source=test_eval | fieldformat age_desc = CAST(age AS STRING).\" years\" | fields" + + " name, age, age_desc")); + verifySchema( + result, schema("name", "string"), schema("age", "bigint"), schema("age_desc", "string")); + verifyDataRows( + result, + rows("Alice", 25, "25 years"), + rows("Bob", 30, "30 years"), + rows("Charlie", null, null)); + } + + @Test + public void testFieldFormatStringConcatWithPrefixSuffix() throws IOException { + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + "source=test_eval | fieldformat age_desc = \"Age: \".CAST(age AS STRING).\" years\"" + + " | fields name, age, age_desc")); + verifySchema( + result, schema("name", "string"), schema("age", "bigint"), schema("age_desc", "string")); + verifyDataRows( + result, + rows("Alice", 25, "Age: 25 years"), + rows("Bob", 30, "Age: 30 years"), + rows("Charlie", null, null)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java new file mode 100644 index 00000000000..1cf535bd71b --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMvCombineCommandIT.java @@ -0,0 +1,256 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.json.JSONArray; +import org.json.JSONObject; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.opensearch.client.ResponseException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteMvCombineCommandIT extends PPLIntegTestCase { + + private static final String INDEX = Index.MVCOMBINE.getName(); + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.MVCOMBINE); + } + + // --------------------------- + // Sanity (precondition) + // --------------------------- + + @Test + public void testSanity_datasetIsLoaded() throws IOException { + JSONObject result = executeQuery("source=" + INDEX + " | head 5"); + int rows = result.getJSONArray("datarows").length(); + Assertions.assertTrue(rows > 0, "Expected MVCOMBINE dataset to have rows, got 0"); + } + + // --------------------------- + // Happy path (core mvcombine) + // --------------------------- + + @Test + public void testMvCombine_basicGroupCollapsesToOneRow() throws IOException { + String q = + "source=" + + INDEX + + " | where ip='10.0.0.1' and bytes=100 and tags='t1'" + + " | fields ip, bytes, tags, packets_str" + + " | mvcombine packets_str"; + + JSONObject result = executeQuery(q); + + verifyNumOfRows(result, 1); + + verifySchema( + result, + schema("ip", null, "string"), + schema("bytes", null, "bigint"), + schema("tags", null, "string"), + schema("packets_str", null, "array")); + + verifyDataRows(result, rows("10.0.0.1", 100, "t1", List.of("10", "20", "30"))); + } + + @Test + public void testMvCombine_singleRowGroupStaysSingleRow() throws IOException { + String q = + "source=" + + INDEX + + " | where ip='10.0.0.2' and bytes=200 and tags='t2'" + + " | fields ip, tags, packets_str" + + " | mvcombine packets_str"; + + JSONObject result = executeQuery(q); + + verifySchema( + result, + schema("ip", null, "string"), + schema("tags", null, "string"), + schema("packets_str", null, "array")); + + verifyDataRows(result, rows("10.0.0.2", "t2", new JSONArray().put("7"))); + } + + @Test + public void testMvCombine_missingTargetWithinGroup_collapses_nonNullPreserved() + throws IOException { + String q = + "source=" + + INDEX + + " | where ip='10.0.0.3' and bytes=300 and tags='t3'" + + " | fields ip, bytes, tags, packets_str" + + " | mvcombine packets_str"; + + JSONObject result = executeQuery(q); + + verifyNumOfRows(result, 1); + + verifySchema( + result, + schema("ip", null, "string"), + schema("bytes", null, "bigint"), + schema("tags", null, "string"), + schema("packets_str", null, "array")); + + verifyDataRows(result, rows("10.0.0.3", 300, "t3", List.of("5"))); + } + + // --------------------------- + // Multi-group behavior + // --------------------------- + + @Test + public void testMvCombine_multipleGroups_producesOneRowPerGroupKey() throws IOException { + String base = + "source=" + + INDEX + + " | where (ip='10.0.0.7' or ip='10.0.0.8') and bytes=700 and tags='t7'" + + " | fields ip, bytes, tags, packets_str"; + + JSONObject result = executeQuery(base + " | mvcombine packets_str | sort ip"); + + verifyNumOfRows(result, 2); + + verifySchema( + result, + schema("ip", null, "string"), + schema("bytes", null, "bigint"), + schema("tags", null, "string"), + schema("packets_str", null, "array")); + + // MV contents differ per group → helper cannot express membership safely + JSONArray r0 = result.getJSONArray("datarows").getJSONArray(0); + JSONArray r1 = result.getJSONArray("datarows").getJSONArray(1); + + List mv0 = toStringListDropNulls(r0.get(3)); + List mv1 = toStringListDropNulls(r1.get(3)); + + Assertions.assertEquals("10.0.0.7", r0.getString(0)); + Assertions.assertEquals("10.0.0.8", r1.getString(0)); + + Assertions.assertTrue(mv0.containsAll(List.of("1", "2"))); + Assertions.assertEquals(2, mv0.size()); + Assertions.assertEquals(List.of("9"), mv1); + } + + // --------------------------- + // delim: Splunk-compatible command input + output shape + // --------------------------- + + @Test + public void testMvCombine_delim_shouldNotChangeMvShape_ifSupported_elseSyntaxRejected() + throws Exception { + String base = + "source=" + + INDEX + + " | where ip='10.0.0.9' and bytes=900 and tags='t9'" + + " | fields ip, bytes, tags, packets_str"; + + String q = base + " | mvcombine delim='|' packets_str"; + + try { + JSONObject result = executeQuery(q); + + verifyNumOfRows(result, 1); + + verifySchema( + result, + schema("ip", null, "string"), + schema("bytes", null, "bigint"), + schema("tags", null, "string"), + schema("packets_str", null, "array")); + + Object cell = result.getJSONArray("datarows").getJSONArray(0).get(3); + Assertions.assertTrue(cell instanceof JSONArray); + + List mv = toStringListDropNulls(cell); + Assertions.assertTrue(mv.contains("1")); + Assertions.assertTrue(mv.contains("2")); + Assertions.assertTrue(mv.contains("3")); + } catch (ResponseException e) { + Assertions.assertTrue(isSyntaxBadRequest(e)); + } + } + + // --------------------------- + // Edge case / error semantics + // --------------------------- + + @Test + public void testMvCombine_missingField_shouldReturn4xx() throws IOException { + ResponseException ex = + Assertions.assertThrows( + ResponseException.class, + () -> executeQuery("source=" + INDEX + " | mvcombine does_not_exist")); + + int status = ex.getResponse().getStatusLine().getStatusCode(); + + Assertions.assertEquals(400, status, "Unexpected status. ex=" + ex.getMessage()); + + String msg = ex.getMessage(); + Assertions.assertTrue(msg.contains("Field [does_not_exist] not found."), msg); + } + + // --------------------------- + // Helpers + // --------------------------- + + private static boolean isSyntaxBadRequest(ResponseException e) { + int status = e.getResponse().getStatusLine().getStatusCode(); + if (status != 400) return false; + + String msg = e.getMessage(); + if (msg == null) return false; + + return msg.contains("SyntaxCheckException") + || msg.contains("Invalid Query") + || msg.contains("parsing_exception") + || msg.contains("ParseException"); + } + + private static List toStringListKeepNulls(Object cell) { + if (cell == null || cell == JSONObject.NULL) { + return Collections.emptyList(); + } + if (cell instanceof JSONArray arr) { + List out = new ArrayList<>(); + for (int i = 0; i < arr.length(); i++) { + Object v = arr.get(i); + out.add(v == JSONObject.NULL ? null : String.valueOf(v)); + } + return out; + } + return List.of(String.valueOf(cell)); + } + + private static List toStringListDropNulls(Object cell) { + List all = toStringListKeepNulls(cell); + if (all.isEmpty()) return all; + + List out = new ArrayList<>(); + for (String v : all) { + if (v != null) out.add(v); + } + return out; + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java index 0c2c2e94a7e..1405df66753 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLSpathCommandIT.java @@ -262,4 +262,114 @@ public void testAppendWithSpathInSubsearchDynamicFields() throws IOException { rows(null, null, null, "simple", "4", sj("{'a': 1, 'b': 2, 'c': 3}")), rows("1", "3", "2", "simple", null, sj("{'a': 1, 'b': 2, 'c': 3}"))); } + + @Test + public void testAppendColWithSpathInMain() throws IOException { + JSONObject result = + executeQuery( + "source=test_json | where category='simple' | spath input=userData | appendcol [where" + + " category='simple'] | fields a, c, *"); + verifySchema( + result, + schema("a", "string"), + schema("c", "string"), + schema("category", "string"), + schema("userData", "string"), + schema("b", "string")); + verifyDataRows( + result, + rows("1", "3", "simple", sj("{'a': 1, 'b': 2, 'c': 3}"), "2"), + rows("1", "3", "simple", sj("{'a': 1, 'b': 2, 'c': 3}"), "2")); + } + + @Test + public void testAppendColWithSpathInSubsearch() throws IOException { + JSONObject result = + executeQuery( + "source=test_json | where category='simple' | appendcol [where category='simple' |" + + " spath input=userData] | fields a, c, *"); + verifySchema( + result, + schema("a", "string"), + schema("c", "string"), + schema("category", "string"), + schema("userData", "string"), + schema("b", "string")); + verifyDataRows( + result, + rows("1", "3", "simple", sj("{'a': 1, 'b': 2, 'c': 3}"), "2"), + rows("1", "3", "simple", sj("{'a': 1, 'b': 2, 'c': 3}"), "2")); + } + + @Test + public void testAppendColWithSpathInBothInputs() throws IOException { + JSONObject result = + executeQuery( + "source=test_json | where category='simple' | spath input=userData | appendcol [where" + + " category='simple' | spath input=userData ] | fields a, c, *"); + verifySchema( + result, + schema("a", "string"), + schema("c", "string"), + schema("b", "string"), + schema("category", "string"), + schema("userData", "string")); + verifyDataRows( + result, + rows("1", "3", "2", "simple", sj("{'a': 1, 'b': 2, 'c': 3}")), + rows("1", "3", "2", "simple", sj("{'a': 1, 'b': 2, 'c': 3}"))); + } + + @Test + public void testAppendPipeWithSpathInMain() throws IOException { + JSONObject result = + executeQuery( + "source=test_json | where category='simple' | spath input=userData | stats sum(a) as" + + " total by b | appendpipe [stats sum(total) as total] | head 5"); + verifySchema(result, schema("total", "double"), schema("b", "string")); + verifyDataRows(result, rows(2, "2"), rows(2, null)); + } + + @Test + public void testMultisearchWithSpath() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [source=test_json | where category='simple' | spath input=userData |" + + " head 1] [source=test_json | where category='nested' | spath input=userData] |" + + " fields a, c, *"); + verifySchema( + result, + schema("a", "string"), + schema("c", "string"), + schema("b", "string"), + schema("category", "string"), + schema("nested.d{}", "string"), + schema("nested.e", "string"), + schema("userData", "string")); + verifyDataRows( + result, + rows("1", "3", "2", "simple", null, null, sj("{'a': 1, 'b': 2, 'c': 3}")), + rows( + null, + null, + null, + "nested", + "[1, 2, 3]", + "str", + sj("{'nested': {'d': [1, 2, 3], 'e': 'str'}}"))); + } + + @Test + public void testSpathWithMvCombine() throws IOException { + JSONObject result = + executeQuery( + "source=test_json | where category='simple' " + + "| spath input=userData " + + "| fields a, b, c " + + "| mvcombine c"); + + verifySchema(result, schema("a", "string"), schema("b", "string"), schema("c", "array")); + + verifyDataRows(result, rows("1", "2", new String[] {"3", "3"})); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java new file mode 100644 index 00000000000..44df58b7ab8 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -0,0 +1,173 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.util.MatcherUtils.*; +import static org.opensearch.sql.util.MatcherUtils.rows; + +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteTransposeCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + /** + * default test without parameters on account index + * + * @throws IOException + */ + @Test + public void testTranspose() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 10 | fields firstname , age, balance | transpose 14", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string"), + schema("row 6", "string"), + schema("row 7", "string"), + schema("row 8", "string"), + schema("row 9", "string"), + schema("row 10", "string"), + schema("row 11", "string"), + schema("row 12", "string"), + schema("row 13", "string"), + schema("row 14", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows( + "firstname", + "Amber", + "Hattie", + "Nanette", + "Dale", + "Elinor", + "Virginia", + "Dillard", + "Mcgee", + "Aurelia", + "Fulton", + null, + null, + null, + null), + rows( + "balance", "39225", "5686", "32838", "4180", "16418", "40540", "48086", "18612", + "34487", "29104", null, null, null, null), + rows( + "age", "32", "36", "28", "33", "36", "39", "34", "39", "37", "23", null, null, null, + null)); + } + + @Test + public void testTransposeLowerLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 15 | fields firstname , age, balance | transpose 5", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeColumnName() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose 5" + + " column_name='column_names'", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column_names", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java index 91cf5dc943c..91584fb45cf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/OpenSearchSQLRestTestCase.java @@ -311,7 +311,9 @@ protected static void configureHttpsClient( * cluster. */ public void configureMultiClusters(String remote) throws IOException { - initRemoteClient(remote); + if (remoteClient == null) { + initRemoteClient(remote); + } Request connectionRequest = new Request("PUT", "_cluster/settings"); String connectionSetting = diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index d9b76f757f9..c9de7a584c6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -210,19 +210,6 @@ protected synchronized void loadIndex(Index index, RestClient client) throws IOE createIndexByRestClient(client, indexName, mapping); loadDataByRestClient(client, indexName, dataSet); } - // loadIndex() could directly return when isIndexExist()=true, - // e.g. the index is created in the cluster but data hasn't been flushed. - // We block loadIndex() until data loaded to resolve - // https://github.com/opensearch-project/sql/issues/4261 - int countDown = 3; // 1500ms timeout - while (countDown != 0 && getDocCount(client, indexName) == 0) { - try { - Thread.sleep(500); - countDown--; - } catch (InterruptedException e) { - throw new IOException(e); - } - } } protected synchronized void loadIndex(Index index) throws IOException { @@ -926,6 +913,11 @@ public enum Index { "time_data", getMappingFile("time_test_data_index_mapping.json"), "src/test/resources/time_test_data.json"), + MVCOMBINE( + "test_index_mvcombine", + "_doc", + getMappingFile("mvcombine_index_mapping.json"), + "src/test/resources/mvcombine.json"), TIME_TEST_DATA_WITH_NULL( TestsConstants.TEST_INDEX_TIME_DATE_NULL, "time_data_with_null", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 2ac1763836e..aa8b52af4aa 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -46,12 +46,29 @@ public class TestUtils { */ public static void createIndexByRestClient(RestClient client, String indexName, String mapping) { Request request = new Request("PUT", "/" + indexName); - if (!isNullOrEmpty(mapping)) { - request.setJsonEntity(mapping); - } + JSONObject jsonObject = isNullOrEmpty(mapping) ? new JSONObject() : new JSONObject(mapping); + setZeroReplicas(jsonObject); + request.setJsonEntity(jsonObject.toString()); performRequest(client, request); } + /** + * Sets number_of_replicas to 0 in the index settings. This makes multi-node behavior consistent + * (4261) and prevents tests + * from hanging on single-node clusters when using wait_for_active_shards=all. + * + * @param jsonObject the index creation JSON object to modify + */ + private static void setZeroReplicas(JSONObject jsonObject) { + JSONObject settings = + jsonObject.has("settings") ? jsonObject.getJSONObject("settings") : new JSONObject(); + JSONObject indexSettings = + settings.has("index") ? settings.getJSONObject("index") : new JSONObject(); + indexSettings.put("number_of_replicas", 0); + settings.put("index", indexSettings); + jsonObject.put("settings", settings); + } + /** * https://github.com/elastic/elasticsearch/pull/49959
* Deprecate creation of dot-prefixed index names except for hidden and system indices. Create @@ -99,7 +116,8 @@ public static boolean isIndexExist(RestClient client, String indexName) { public static void loadDataByRestClient( RestClient client, String indexName, String dataSetFilePath) throws IOException { Path path = Paths.get(getResourceFilePath(dataSetFilePath)); - Request request = new Request("POST", "/" + indexName + "/_bulk?refresh=true"); + Request request = + new Request("POST", "/" + indexName + "/_bulk?refresh=wait_for&wait_for_active_shards=all"); request.setJsonEntity(new String(Files.readAllBytes(path))); performRequest(client, request); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 15f3c508b14..c5a1d08c37b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -202,6 +202,30 @@ public void testAddColTotalCommand() throws IOException { } } + @Test + public void testTransposeCommand() throws IOException { + JSONObject result; + try { + executeQuery(String.format("search source=%s | transpose ", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + + @Test + public void testFieldFormatCommand() throws IOException { + JSONObject result; + try { + executeQuery( + String.format( + "search source=%s | fieldformat double_balance = balance * 2 ", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); @@ -214,4 +238,18 @@ private void verifyQuery(JSONObject result) throws IOException { assertThat(error.getString("type"), equalTo("UnsupportedOperationException")); } } + + @Test + public void testMvCombineUnsupportedInV2() throws IOException { + JSONObject result; + try { + result = + executeQuery( + String.format( + "source=%s | fields state, city, age | mvcombine age", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + } + verifyQuery(result); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 1cbd019eca3..571d915517e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -5,9 +5,6 @@ package org.opensearch.sql.security; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; @@ -16,51 +13,22 @@ import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; -import lombok.SneakyThrows; +import org.apache.commons.text.StringEscapeUtils; import org.json.JSONObject; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.opensearch.sql.ppl.PPLIntegTestCase; +import org.junit.Test; /** Cross Cluster Search tests with Calcite enabled for enhanced fields features. */ -public class CalciteCrossClusterSearchIT extends PPLIntegTestCase { - - static { - String[] clusterNames = System.getProperty("cluster.names").split(","); - var remote = "remoteCluster"; - for (var cluster : clusterNames) { - if (cluster.startsWith("remote")) { - remote = cluster; - break; - } - } - REMOTE_CLUSTER = remote; - } - - public static final String REMOTE_CLUSTER; - private static final String TEST_INDEX_ACCOUNT_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_ACCOUNT; - private static final String TEST_INDEX_DOG_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; - private static final String TEST_INDEX_BANK_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_BANK; - private static boolean initialized = false; - - @SneakyThrows - @BeforeEach - public void initialize() { - if (!initialized) { - setUpIndices(); - initialized = true; - } - } +public class CalciteCrossClusterSearchIT extends CrossClusterTestBase { @Override protected void init() throws Exception { - configureMultiClusters(REMOTE_CLUSTER); + super.init(); loadIndex(Index.BANK); loadIndex(Index.BANK, remoteClient()); - loadIndex(Index.ACCOUNT); - loadIndex(Index.ACCOUNT, remoteClient()); loadIndex(Index.DOG); loadIndex(Index.DOG, remoteClient()); + loadIndex(Index.ACCOUNT); + loadIndex(Index.ACCOUNT, remoteClient()); loadIndex(Index.TIME_TEST_DATA); loadIndex(Index.TIME_TEST_DATA, remoteClient()); enableCalcite(); @@ -87,8 +55,8 @@ public void testCrossClusterFieldsWildcardPrefix() throws IOException { public void testCrossClusterFieldsWildcardSuffix() throws IOException { JSONObject result = executeQuery(String.format("search source=%s | fields *Name", TEST_INDEX_DOG_REMOTE)); - verifyColumn(result, columnName("dog_name"), columnName("holdersName")); - verifySchema(result, schema("dog_name", "string"), schema("holdersName", "string")); + verifyColumn(result, columnName("holdersName")); + verifySchema(result, schema("holdersName", "string")); } @Test @@ -165,7 +133,7 @@ public void testDefaultBinCrossCluster() throws IOException { TEST_INDEX_ACCOUNT_REMOTE)); verifySchema(result, schema("count()", null, "bigint"), schema("age", null, "string")); - verifyDataRows(result, rows(451L, "20-30"), rows(504L, "30-40"), rows(45L, "40-50")); + verifyDataRows(result, rows(451, "20.0-30.0"), rows(504L, "30.0-40.0"), rows(45L, "40.0-50.0")); } @Test @@ -218,7 +186,7 @@ public void testRangeBinCrossCluster() throws IOException { TEST_INDEX_ACCOUNT_REMOTE)); verifySchema(result, schema("count()", null, "bigint"), schema("age", null, "string")); - verifyDataRows(result, rows(1000L, "0-100")); + verifyDataRows(result, rows(451, "20-30"), rows(504L, "30-40"), rows(45L, "40-50")); } @Test @@ -226,10 +194,10 @@ public void testTimeBinCrossCluster() throws IOException { // Time-based binning with span JSONObject result = executeQuery( - REMOTE_CLUSTER - + ":opensearch-sql_test_index_time_data" - + " | bin @timestamp span=1h" - + " | fields `@timestamp`, value | sort `@timestamp` | head 3"); + String.format( + "source=%s | bin @timestamp span=1h | fields `@timestamp`, value | sort" + + " `@timestamp` | head 3", + TEST_INDEX_TIME_DATA_REMOTE)); verifySchema(result, schema("@timestamp", null, "timestamp"), schema("value", null, "int")); // With 1-hour spans @@ -285,12 +253,27 @@ public void testCrossClusterRenameFullWildcard() throws IOException { JSONObject result = executeQuery(String.format("search source=%s | rename * as old_*", TEST_INDEX_DOG_REMOTE)); verifyColumn( - result, columnName("old_dog_name"), columnName("old_holdersName"), columnName("old_age")); + result, + columnName("old_dog_name"), + columnName("old_holdersName"), + columnName("old_age"), + columnName("old__id"), + columnName("old__index"), + columnName("old__score"), + columnName("old__maxscore"), + columnName("old__sort"), + columnName("old__routing")); verifySchema( result, schema("old_dog_name", "string"), schema("old_holdersName", "string"), - schema("old_age", "bigint")); + schema("old_age", "bigint"), + schema("old__id", "string"), + schema("old__index", "string"), + schema("old__score", "float"), + schema("old__maxscore", "float"), + schema("old__sort", "bigint"), + schema("old__routing", "string")); } @Test @@ -349,4 +332,90 @@ public void testCrossClusterRexWithOffsetField() throws IOException { verifyDataRows( result, rows("Duke Willmington", "u", "vowel=1-1"), rows("Bond", "o", "vowel=1-1")); } + + @Test + public void testCrossClusterAddTotals() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s| sort 1 age | fields firstname, age | addtotals age", + TEST_INDEX_BANK_REMOTE)); + verifyDataRows(result, rows("Nanette", 28, 28)); + } + + /** CrossClusterSearchIT Test for addcoltotals. */ + @Test + public void testCrossClusterAddColTotals() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette'|fields" + + " firstname,age,balance | addcoltotals age balance", + TEST_INDEX_BANK_REMOTE)); + verifyDataRows( + result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); + } + + @Test + public void testCrossClusterTranspose() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette' or" + + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + + " transpose 3 column_name='column_names'", + TEST_INDEX_BANK_REMOTE)); + + verifyDataRows( + result, + rows("firstname", "Nanette", "Hattie", "Dale"), + rows("balance", "32838", "5686", "4180"), + rows("age", "28", "36", "33")); + } + + @Test + public void testCrossClusterAppend() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | stats count() as cnt by gender | append [ search source=%s |" + + " stats count() as cnt ]", + TEST_INDEX_BANK_REMOTE, TEST_INDEX_BANK_REMOTE)); + verifyDataRows(result, rows(3, "F"), rows(4, "M"), rows(7, null)); + } + + /** CrossClusterSearchIT Test for mvcombine. */ + @Test + public void testCrossClusterMvcombine() throws IOException { + + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname='Nanette' " + + "| fields firstname, age | mvcombine age", + TEST_INDEX_BANK_REMOTE)); + + verifyColumn(result, columnName("firstname"), columnName("age")); + + verifyDataRows( + result, + rows("Hattie", new org.json.JSONArray().put(36)), + rows("Nanette", new org.json.JSONArray().put(28))); + } + + /** CrossClusterSearchIT Test for fieldformat. */ + @Test + public void testCrossClusterFieldFormat() throws IOException { + // Test fieldformat command with tostring + JSONObject result = + executeQuery( + StringEscapeUtils.escapeJson( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette'|fields" + + " firstname,age,balance | fieldformat formatted_balance =" + + " \"$\".tostring(balance,\"commas\")", + TEST_INDEX_BANK_REMOTE))); + verifyDataRows( + result, rows("Hattie", 36, 5686, "$5,686"), rows("Nanette", 28, 32838, "$32,838")); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterCoalesceIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterCoalesceIT.java index 530dccd5287..74d6be236dc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterCoalesceIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterCoalesceIT.java @@ -5,50 +5,21 @@ package org.opensearch.sql.security; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.verifyColumn; import java.io.IOException; -import lombok.SneakyThrows; import org.json.JSONObject; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.opensearch.sql.ppl.PPLIntegTestCase; +import org.junit.Test; -public class CrossClusterCoalesceIT extends PPLIntegTestCase { - - static { - String[] clusterNames = System.getProperty("cluster.names").split(","); - var remote = "remoteCluster"; - for (var cluster : clusterNames) { - if (cluster.startsWith("remote")) { - remote = cluster; - break; - } - } - REMOTE_CLUSTER = remote; - } - - public static final String REMOTE_CLUSTER; - private static final String TEST_INDEX_DOG_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; - private static boolean initialized = false; - - @SneakyThrows - @BeforeEach - public void initialize() { - if (!initialized) { - setUpIndices(); - initialized = true; - } - } +public class CrossClusterCoalesceIT extends CrossClusterTestBase { @Override protected void init() throws Exception { - enableCalcite(); - configureMultiClusters(REMOTE_CLUSTER); + super.init(); loadIndex(Index.DOG); loadIndex(Index.DOG, remoteClient()); + enableCalcite(); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 7ee90dc4640..abdbb616e2c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -5,60 +5,23 @@ package org.opensearch.sql.security; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; -import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; import static org.opensearch.sql.util.MatcherUtils.columnName; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.verifyColumn; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; import java.io.IOException; -import lombok.SneakyThrows; import org.json.JSONObject; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.Test; import org.opensearch.client.ResponseException; -import org.opensearch.sql.ppl.PPLIntegTestCase; /** Cross Cluster Search tests to be executed with security plugin. */ -public class CrossClusterSearchIT extends PPLIntegTestCase { - - static { - // find a remote cluster - String[] clusterNames = System.getProperty("cluster.names").split(","); - var remote = "remoteCluster"; - for (var cluster : clusterNames) { - if (cluster.startsWith("remote")) { - remote = cluster; - break; - } - } - REMOTE_CLUSTER = remote; - } - - public static final String REMOTE_CLUSTER; - - private static final String TEST_INDEX_BANK_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_BANK; - private static final String TEST_INDEX_DOG_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; - private static final String TEST_INDEX_DOG_MATCH_ALL_REMOTE = - MATCH_ALL_REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; - private static final String TEST_INDEX_ACCOUNT_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_ACCOUNT; - - private static boolean initialized = false; - - @SneakyThrows - @BeforeEach - public void initialize() { - if (!initialized) { - setUpIndices(); - initialized = true; - } - } +public class CrossClusterSearchIT extends CrossClusterTestBase { @Override protected void init() throws Exception { - configureMultiClusters(REMOTE_CLUSTER); + super.init(); loadIndex(Index.BANK); loadIndex(Index.BANK, remoteClient()); loadIndex(Index.DOG); @@ -246,45 +209,4 @@ public void testCrossClusterQueryStringWithoutFields() throws IOException { TEST_INDEX_BANK_REMOTE)); verifyDataRows(result, rows("Hattie")); } - - @Test - public void testCrossClusterAddTotals() throws IOException { - // Test query_string without fields parameter on remote cluster - JSONObject result = - executeQuery( - String.format( - "search source=%s| sort 1 age | fields firstname, age | addtotals age", - TEST_INDEX_BANK_REMOTE)); - verifyDataRows(result, rows("Nanette", 28, 28)); - } - - /** CrossClusterSearchIT Test for addcoltotals. */ - @Test - public void testCrossClusterAddColTotals() throws IOException { - // Test query_string without fields parameter on remote cluster - JSONObject result = - executeQuery( - String.format( - "search source=%s | where firstname='Hattie' or firstname ='Nanette'|fields" - + " firstname,age,balance | addcoltotals age balance", - TEST_INDEX_BANK_REMOTE)); - verifyDataRows( - result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); - } - - @Test - public void testCrossClusterAppend() throws IOException { - // TODO: We should enable calcite by default in CrossClusterSearchIT? - enableCalcite(); - - JSONObject result = - executeQuery( - String.format( - "search source=%s | stats count() as cnt by gender | append [ search source=%s |" - + " stats count() as cnt ]", - TEST_INDEX_BANK_REMOTE, TEST_INDEX_BANK_REMOTE)); - verifyDataRows(result, rows(3, "F"), rows(4, "M"), rows(7, null)); - - disableCalcite(); - } } diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java new file mode 100644 index 00000000000..d9de95c663b --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterTestBase.java @@ -0,0 +1,45 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.security; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_TIME_DATA; + +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CrossClusterTestBase extends PPLIntegTestCase { + static { + // find a remote cluster + String[] clusterNames = System.getProperty("cluster.names").split(","); + var remote = "remoteCluster"; + for (var cluster : clusterNames) { + if (cluster.startsWith("remote")) { + remote = cluster; + break; + } + } + REMOTE_CLUSTER = remote; + } + + public static final String REMOTE_CLUSTER; + + protected static final String TEST_INDEX_BANK_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_BANK; + protected static final String TEST_INDEX_DOG_REMOTE = REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; + protected static final String TEST_INDEX_DOG_MATCH_ALL_REMOTE = + MATCH_ALL_REMOTE_CLUSTER + ":" + TEST_INDEX_DOG; + protected static final String TEST_INDEX_ACCOUNT_REMOTE = + REMOTE_CLUSTER + ":" + TEST_INDEX_ACCOUNT; + protected static final String TEST_INDEX_TIME_DATA_REMOTE = + REMOTE_CLUSTER + ":" + TEST_INDEX_TIME_DATA; + + @Override + protected void init() throws Exception { + super.init(); + configureMultiClusters(REMOTE_CLUSTER); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java index 4664491b686..d84ca2af1b3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/PPLPermissionsIT.java @@ -14,10 +14,8 @@ import java.io.IOException; import java.util.Locale; -import lombok.SneakyThrows; import org.json.JSONObject; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; +import org.junit.Test; import org.opensearch.client.Request; import org.opensearch.client.RequestOptions; import org.opensearch.client.Response; @@ -59,21 +57,12 @@ public class PPLPermissionsIT extends PPLIntegTestCase { private static final String NO_PIT_USER = "no_pit_user"; private static final String NO_PIT_ROLE = "no_pit_role"; - private static boolean initialized = false; - - @SneakyThrows - @BeforeEach - public void initialize() { - if (!initialized) { - setUpIndices(); - createSecurityRolesAndUsers(); - initialized = true; - } - } + private boolean initialized = false; @Override protected void init() throws Exception { super.init(); + createSecurityRolesAndUsers(); loadIndex(Index.BANK); loadIndex(Index.DOG); // Enable Calcite engine to test PIT behavior with Calcite @@ -87,24 +76,27 @@ protected void init() throws Exception { * access to their specific index. */ private void createSecurityRolesAndUsers() throws IOException { - // Create role for bank index access - createRole(BANK_ROLE, TEST_INDEX_BANK); + if (!initialized) { + // Create role for bank index access + createRole(BANK_ROLE, TEST_INDEX_BANK); - // Create role for dog index access - createRole(DOG_ROLE, TEST_INDEX_DOG); + // Create role for dog index access + createRole(DOG_ROLE, TEST_INDEX_DOG); - // Create users and map them to roles - createUser(BANK_USER, BANK_ROLE); - createUser(DOG_USER, DOG_ROLE); + // Create users and map them to roles + createUser(BANK_USER, BANK_ROLE); + createUser(DOG_USER, DOG_ROLE); - // Create roles for testing missing permissions - createRoleWithMissingPermissions(); + // Create roles for testing missing permissions + createRoleWithMissingPermissions(); - // Create user with minimal permissions for plugin-based PIT testing - createMinimalUserForPitTesting(); + // Create user with minimal permissions for plugin-based PIT testing + createMinimalUserForPitTesting(); - // Create user without PIT permissions to test PIT requirement - createNoPitUserForTesting(); + // Create user without PIT permissions to test PIT requirement + createNoPitUserForTesting(); + initialized = true; + } } private void createRole(String roleName, String indexPattern) throws IOException { diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml new file mode 100644 index 00000000000..202f594bef0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_field_format.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], formatted_balance=[||('$':VARCHAR, TOSTRING($3, 'commas':VARCHAR))]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableCalc(expr#0..10=[{inputs}], expr#11=['$':VARCHAR], expr#12=['commas':VARCHAR], expr#13=[TOSTRING($t3, $t12)], expr#14=[||($t11, $t13)], proj#0..10=[{exprs}], formatted_balance=[$t14]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml new file mode 100644 index 00000000000..ff54f066772 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_mvcombine.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$0], city=[$1], age=[$2]) + LogicalAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) + LogicalProject(state=[$7], city=[$5], age=[$8], $f3=[IS NOT NULL($8)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) + EnumerableCalc(expr#0..2=[{inputs}], expr#3=[IS NOT NULL($t2)], proj#0..3=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[state, city, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["state","city","age"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml new file mode 100644 index 00000000000..80409e6f717 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -0,0 +1,22 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) + LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) + LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) + LogicalFilter(condition=[IS NOT NULL($19)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, null:NULL)]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) + EnumerableCalc(expr#0..12=[{inputs}], expr#13=['account_number'], expr#14=[=($t12, $t13)], expr#15=[CAST($t0):VARCHAR NOT NULL], expr#16=['firstname'], expr#17=[=($t12, $t16)], expr#18=[CAST($t1):VARCHAR NOT NULL], expr#19=['address'], expr#20=[=($t12, $t19)], expr#21=[CAST($t2):VARCHAR NOT NULL], expr#22=['balance'], expr#23=[=($t12, $t22)], expr#24=[CAST($t3):VARCHAR NOT NULL], expr#25=['gender'], expr#26=[=($t12, $t25)], expr#27=[CAST($t4):VARCHAR NOT NULL], expr#28=['city'], expr#29=[=($t12, $t28)], expr#30=[CAST($t5):VARCHAR NOT NULL], expr#31=['employer'], expr#32=[=($t12, $t31)], expr#33=[CAST($t6):VARCHAR NOT NULL], expr#34=['state'], expr#35=[=($t12, $t34)], expr#36=[CAST($t7):VARCHAR NOT NULL], expr#37=['age'], expr#38=[=($t12, $t37)], expr#39=[CAST($t8):VARCHAR NOT NULL], expr#40=['email'], expr#41=[=($t12, $t40)], expr#42=[CAST($t9):VARCHAR NOT NULL], expr#43=['lastname'], expr#44=[=($t12, $t43)], expr#45=[CAST($t10):VARCHAR NOT NULL], expr#46=[null:NULL], expr#47=[CASE($t14, $t15, $t17, $t18, $t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t46)], expr#48=[CAST($t47):VARCHAR NOT NULL], expr#49=[FLAG(BOTH)], expr#50=[' '], expr#51=[TRIM($t49, $t50, $t12)], expr#52=[1], expr#53=[=($t11, $t52)], expr#54=[2], expr#55=[=($t11, $t54)], expr#56=[3], expr#57=[=($t11, $t56)], expr#58=[4], expr#59=[=($t11, $t58)], value=[$t48], $f20=[$t51], $f21=[$t53], $f22=[$t55], $f23=[$t57], $f24=[$t59]) + EnumerableNestedLoopJoin(condition=[true], joinType=[inner]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableCalc(expr#0=[{inputs}], expr#1=[Sarg['account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)], expr#2=[SEARCH($t0, $t1)], column_names=[$t0], $condition=[$t2]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvcombine.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvcombine.yaml new file mode 100644 index 00000000000..35f2d79e7c8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_mvcombine.yaml @@ -0,0 +1,12 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(state=[$0], city=[$1], age=[$2]) + LogicalAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) + LogicalProject(state=[$7], city=[$5], age=[$8], $f3=[IS NOT NULL($8)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{0, 1}], age=[ARRAY_AGG($2) FILTER $3]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t8)], state=[$t7], city=[$t5], age=[$t8], $f3=[$t17]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/mvcombine.json b/integ-test/src/test/resources/mvcombine.json new file mode 100644 index 00000000000..315aafb532c --- /dev/null +++ b/integ-test/src/test/resources/mvcombine.json @@ -0,0 +1,42 @@ +{ "index": { "_index": "test_index_mvcombine", "_id": "1" } } +{ "ip": "10.0.0.1", "bytes": 100, "tags": "t1", "packets_str": "10" } +{ "index": { "_index": "test_index_mvcombine", "_id": "2" } } +{ "ip": "10.0.0.1", "bytes": 100, "tags": "t1", "packets_str": "20" } +{ "index": { "_index": "test_index_mvcombine", "_id": "3" } } +{ "ip": "10.0.0.1", "bytes": 100, "tags": "t1", "packets_str": "30" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "4" } } +{ "ip": "10.0.0.2", "bytes": 200, "tags": "t2", "packets_str": "7" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "5" } } +{ "ip": "10.0.0.3", "bytes": 300, "tags": "t3", "packets_str": "5" } +{ "index": { "_index": "test_index_mvcombine", "_id": "6" } } +{ "ip": "10.0.0.3", "bytes": 300, "tags": "t3" } +{ "index": { "_index": "test_index_mvcombine", "_id": "7" } } +{ "ip": "10.0.0.3", "bytes": 300, "tags": "t3", "letters": "a" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "16" } } +{ "ip": "10.0.0.7", "bytes": 700, "tags": "t7", "packets_str": "1" } +{ "index": { "_index": "test_index_mvcombine", "_id": "17" } } +{ "ip": "10.0.0.7", "bytes": 700, "tags": "t7", "packets_str": "2" } +{ "index": { "_index": "test_index_mvcombine", "_id": "18" } } +{ "ip": "10.0.0.8", "bytes": 700, "tags": "t7", "packets_str": "9" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "19" } } +{ "ip": "10.0.0.9", "bytes": 900, "tags": "t9", "packets_str": "1" } +{ "index": { "_index": "test_index_mvcombine", "_id": "20" } } +{ "ip": "10.0.0.9", "bytes": 900, "tags": "t9", "packets_str": "2" } +{ "index": { "_index": "test_index_mvcombine", "_id": "21" } } +{ "ip": "10.0.0.9", "bytes": 900, "tags": "t9", "packets_str": "3" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "11" } } +{ "ip": "10.0.0.5", "bytes": 500, "tags": "t5", "packets_str": "dup" } +{ "index": { "_index": "test_index_mvcombine", "_id": "12" } } +{ "ip": "10.0.0.5", "bytes": 500, "tags": "t5", "packets_str": "dup" } +{ "index": { "_index": "test_index_mvcombine", "_id": "13" } } +{ "ip": "10.0.0.5", "bytes": 500, "tags": "t5", "packets_str": "x" } + +{ "index": { "_index": "test_index_mvcombine", "_id": "14" } } +{ "ip": "10.0.0.6", "bytes": 600, "tags": "t6", "packets_str": "" } +{ "index": { "_index": "test_index_mvcombine", "_id": "15" } } +{ "ip": "10.0.0.6", "bytes": 600, "tags": "t6", "packets_str": "z" } diff --git a/integ-test/src/test/resources/mvcombine_index_mapping.json b/integ-test/src/test/resources/mvcombine_index_mapping.json new file mode 100644 index 00000000000..0c008faf2f1 --- /dev/null +++ b/integ-test/src/test/resources/mvcombine_index_mapping.json @@ -0,0 +1,13 @@ +{ + "mappings": { + "properties": { + "case": { "type": "keyword" }, + "ip": { "type": "ip" }, + "bytes": { "type": "long" }, + "tags": { "type": "keyword" }, + + "packets_str": { "type": "keyword" }, + "letters": { "type": "keyword" } + } + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..9113663e473 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -26,6 +26,7 @@ STREAMSTATS: 'STREAMSTATS'; DEDUP: 'DEDUP'; SORT: 'SORT'; EVAL: 'EVAL'; +FIELDFORMAT: 'FIELDFORMAT'; HEAD: 'HEAD'; BIN: 'BIN'; TOP: 'TOP'; @@ -46,6 +47,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; @@ -70,6 +72,9 @@ LABEL: 'LABEL'; SHOW_NUMBERED_TOKEN: 'SHOW_NUMBERED_TOKEN'; AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; +COLUMN_NAME: 'COLUMN_NAME'; +MVCOMBINE: 'MVCOMBINE'; + //Native JOIN KEYWORDS JOIN: 'JOIN'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e4500ee1e6f..8cc4ed932d8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -85,9 +85,12 @@ commands | regexCommand | chartCommand | timechartCommand + | transposeCommand | rexCommand | appendPipeCommand | replaceCommand + | mvcombineCommand + | fieldformatCommand ; commandName @@ -105,6 +108,7 @@ commandName | DEDUP | SORT | EVAL + | FIELDFORMAT | HEAD | BIN | TOP @@ -131,6 +135,8 @@ commandName | REX | APPENDPIPE | REPLACE + | MVCOMBINE + | TRANSPOSE ; searchCommand @@ -328,6 +334,16 @@ timechartCommand : TIMECHART timechartParameter* statsAggTerm (BY fieldExpression)? timechartParameter* ; +transposeCommand + : TRANSPOSE transposeParameter* + ; + +transposeParameter + : (number = integerLiteral) + | (COLUMN_NAME EQUAL stringLiteral) + ; + + timechartParameter : LIMIT EQUAL integerLiteral | SPAN EQUAL spanLiteral @@ -347,6 +363,10 @@ evalCommand : EVAL evalClause (COMMA evalClause)* ; +fieldformatCommand + : FIELDFORMAT fieldFormatEvalClause (COMMA fieldFormatEvalClause)* + ; + headCommand : HEAD (number = integerLiteral)? (FROM from = integerLiteral)? ; @@ -531,6 +551,10 @@ expandCommand : EXPAND fieldExpression (AS alias = qualifiedName)? ; +mvcombineCommand + : MVCOMBINE fieldExpression (DELIM EQUAL stringLiteral)? + ; + flattenCommand : FLATTEN fieldExpression (AS aliases = identifierSeq)? ; @@ -720,6 +744,10 @@ evalClause : fieldExpression EQUAL logicalExpression ; +fieldFormatEvalClause + : fieldExpression EQUAL ffLogicalExpression + ; + eventstatsAggTerm : windowFunction (AS alias = wcFieldExpression)? ; @@ -813,6 +841,13 @@ numericLiteral | floatLiteral ; +ffLogicalExpression + : stringLiteral DOT logicalExpression # stringDotlogicalExpression + | stringLiteral DOT logicalExpression DOT stringLiteral # stringDotlogicalExpressionDotString + | logicalExpression DOT stringLiteral # logicalExpressionDotString + | logicalExpression # ffStandardLogicalExpression + ; + // predicates logicalExpression : NOT logicalExpression # logicalNot @@ -830,6 +865,7 @@ expression | expression NOT? BETWEEN expression AND expression # between ; + valueExpression : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic @@ -1657,5 +1693,6 @@ searchableKeyWord | FIELDNAME | ROW | COL + | COLUMN_NAME ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 07f7e7935a0..d4426590e00 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -15,6 +15,7 @@ import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DescribeCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.DynamicSourceClauseContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.EvalCommandContext; +import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldformatCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.FieldsCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.HeadCommandContext; import static org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.RenameCommandContext; @@ -91,6 +92,7 @@ import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -111,6 +113,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; @@ -742,6 +745,13 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Transpose command. */ + @Override + public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { + java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); + return new Transpose(arguments); + } + /** Chart command. */ @Override public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { @@ -813,6 +823,18 @@ public UnresolvedPlan visitEvalCommand(EvalCommandContext ctx) { .collect(Collectors.toList())); } + @Override + public UnresolvedPlan visitFieldformatCommand(FieldformatCommandContext ctx) { + // Use the new fieldFormatEvalClause instead of evalClause + org.opensearch.sql.ast.tree.Eval eval = + new org.opensearch.sql.ast.tree.Eval( + ctx.fieldFormatEvalClause().stream() + .map(ct -> (Let) internalVisitExpression(ct)) + .collect(Collectors.toList())); + + return eval; + } + private List getGroupByList(ByClauseContext ctx) { return ctx.fieldList().fieldExpression().stream() .map(this::internalVisitExpression) @@ -874,6 +896,18 @@ public UnresolvedPlan visitExpandCommand(OpenSearchPPLParser.ExpandCommandContex return new Expand(fieldExpression, alias); } + /** mvcombine command. */ + @Override + public UnresolvedPlan visitMvcombineCommand(OpenSearchPPLParser.MvcombineCommandContext ctx) { + Field field = (Field) internalVisitExpression(ctx.fieldExpression()); + + String delim = null; + if (ctx.DELIM() != null) { + delim = StringUtils.unquoteText(getTextInQuery(ctx.stringLiteral())); + } + return new MvCombine(field, delim); + } + @Override public UnresolvedPlan visitGrokCommand(OpenSearchPPLParser.GrokCommandContext ctx) { UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 283297c4ead..471c0c2f1c9 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -105,6 +105,54 @@ public UnresolvedExpression visitEvalClause(EvalClauseContext ctx) { return new Let((Field) visit(ctx.fieldExpression()), visit(ctx.logicalExpression())); } + /** Field format eval clause - similar to evalClause but for fieldformat command. */ + @Override + public UnresolvedExpression visitFieldFormatEvalClause( + OpenSearchPPLParser.FieldFormatEvalClauseContext ctx) { + OpenSearchPPLParser.FfLogicalExpressionContext ffLogicalExpressionCtx = + ctx.ffLogicalExpression(); + OpenSearchPPLParser.LogicalExpressionContext logicalExpression = null; + Literal prefix = null; + Literal suffix = null; + switch (ffLogicalExpressionCtx) { + case OpenSearchPPLParser.FfStandardLogicalExpressionContext + ffStandardLogicalExpressionContext -> { + // Standard logical expression + logicalExpression = ffStandardLogicalExpressionContext.logicalExpression(); + return new Let((Field) visit(ctx.fieldExpression()), visit(logicalExpression)); + } + case OpenSearchPPLParser.StringDotlogicalExpressionContext + stringDotlogicalExpressionContext -> { + // String dot logical expression + logicalExpression = stringDotlogicalExpressionContext.logicalExpression(); + prefix = (Literal) visit(stringDotlogicalExpressionContext.stringLiteral()); + return new Let( + (Field) visit(ctx.fieldExpression()), visit(logicalExpression), prefix, suffix); + } + case OpenSearchPPLParser.LogicalExpressionDotStringContext + logicalExpressionDotStringContext -> { + // Logical expression dot string + logicalExpression = logicalExpressionDotStringContext.logicalExpression(); + suffix = (Literal) visit(logicalExpressionDotStringContext.stringLiteral()); + return new Let( + (Field) visit(ctx.fieldExpression()), visit(logicalExpression), prefix, suffix); + } + case OpenSearchPPLParser.StringDotlogicalExpressionDotStringContext + stringDotlogicalExpressionDotStringContext -> { + // Logical expression dot string + logicalExpression = stringDotlogicalExpressionDotStringContext.logicalExpression(); + prefix = (Literal) visit(stringDotlogicalExpressionDotStringContext.stringLiteral(0)); + + suffix = (Literal) visit(stringDotlogicalExpressionDotStringContext.stringLiteral(1)); + return new Let( + (Field) visit(ctx.fieldExpression()), visit(logicalExpression), prefix, suffix); + } + case null, default -> + throw new IllegalArgumentException( + "Unknown ffLogicalExpression context type: " + ctx.getClass()); + } + } + /** Trendline clause. */ @Override public Trendline.TrendlineComputation visitTrendlineClause( diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index ed76b29b77a..72090e2f069 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -8,7 +8,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.dsl.AstDSL; @@ -308,6 +310,30 @@ public static List getArgumentList( return arguments; } + public static Map getArgumentList( + OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { + Map arguments = new HashMap<>(); + for (OpenSearchPPLParser.TransposeParameterContext ctx : + transposeCommandContext.transposeParameter()) { + + if (ctx.COLUMN_NAME() != null) { + if (ctx.stringLiteral() == null) { + throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); + } + Literal columnName = getArgumentValue(ctx.stringLiteral()); + arguments.put("columnName", new Argument("columnName", columnName)); + } else if (ctx.number != null) { + + arguments.put("number", new Argument("number", getArgumentValue(ctx.number))); + } else { + throw new IllegalArgumentException( + String.format( + "A parameter of transpose must be a int limit, column_name , got %s", ctx)); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index a1e31e896dc..4376b5659d4 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -82,6 +82,7 @@ import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.MinSpanBin; import org.opensearch.sql.ast.tree.Multisearch; +import org.opensearch.sql.ast.tree.MvCombine; import org.opensearch.sql.ast.tree.Parse; import org.opensearch.sql.ast.tree.Patterns; import org.opensearch.sql.ast.tree.Project; @@ -100,6 +101,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -463,6 +465,14 @@ public String visitExpand(Expand node, String context) { return StringUtils.format("%s | expand %s", child, field); } + @Override + public String visitMvCombine(MvCombine node, String context) { + String child = node.getChild().getFirst().accept(this, context); + String field = visitExpression(node.getField()); + + return StringUtils.format("%s | mvcombine delim=%s %s", child, MASK_LITERAL, field); + } + /** Build {@link LogicalSort}. */ @Override public String visitSort(Sort node, String context) { @@ -636,6 +646,27 @@ public String visitTrendline(Trendline node, String context) { return StringUtils.format("%s | trendline %s", child, computations); } + @Override + public String visitTranspose(Transpose node, String context) { + if (node.getChild().isEmpty()) { + return "source=*** | transpose"; + } + String child = node.getChild().get(0).accept(this, context); + StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); + java.util.Map arguments = node.getArguments(); + + if (arguments.containsKey("number")) { + Argument numberArg = arguments.get("number"); + if (numberArg != null) { + anonymized.append(StringUtils.format(" %s", numberArg.getValue())); + } + } + if (arguments.containsKey("columnName")) { + anonymized.append(StringUtils.format(" %s=***", "column_name")); + } + return anonymized.toString(); + } + @Override public String visitAppendCol(AppendCol node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java new file mode 100644 index 00000000000..e20bd1b0e47 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLFieldFormatTest.java @@ -0,0 +1,366 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertThrows; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLFieldFormatTest extends CalcitePPLAbstractTest { + + public CalcitePPLFieldFormatTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testFieldFormat1() { + String ppl = "source=EMP | sort EMPNO| head 3 | fieldformat a = 1"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[1])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; a=1\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; a=1\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; a=1\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, 1 `a`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormat2() { + String ppl = + "source=EMP | sort EMPNO| head 3 |fieldformat formatted_salary =" + + " \"$\".tostring(SAL,\"commas\")"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], formatted_salary=[||('$':VARCHAR, TOSTRING($5," + + " 'commas':VARCHAR))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; formatted_salary=$800\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; formatted_salary=$1,600\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; formatted_salary=$1,250\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, '$' ||" + + " TOSTRING(`SAL`, 'commas') `formatted_salary`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormatAndFields() { + String ppl = + "source=EMP | sort EMPNO| head 5|fieldformat formatted_salary =" + + " \"$\".tostring(SAL,\"commas\") |fields EMPNO, JOB, formatted_salary"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], JOB=[$2], formatted_salary=[||('$':VARCHAR, TOSTRING($5," + + " 'commas':VARCHAR))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; JOB=CLERK; formatted_salary=$800\n" + + "EMPNO=7499; JOB=SALESMAN; formatted_salary=$1,600\n" + + "EMPNO=7521; JOB=SALESMAN; formatted_salary=$1,250\n" + + "EMPNO=7566; JOB=MANAGER; formatted_salary=$2,975\n" + + "EMPNO=7654; JOB=SALESMAN; formatted_salary=$1,250\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `JOB`, '$' || TOSTRING(`SAL`, 'commas') `formatted_salary`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 5"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormat2Fields() { + String ppl = "source=EMP | sort EMPNO| head 3 | fieldformat a = 1, b = 2"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[1], b=[2])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; a=1; b=2\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; a=1; b=2\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; a=1; b=2\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, 1 `a`, 2 `b`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormat3() { + String ppl = + "source=EMP | sort EMPNO| head 3 | fieldformat a = 1 | fieldformat b = 2 | fieldformat c =" + + " 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[1], b=[2], c=[3])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; a=1; b=2; c=3\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; a=1; b=2; c=3\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; a=1; b=2; c=3\n"; + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, 1 `a`, 2 `b`," + + " 3 `c`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormatSum() { + String ppl = + "source=EMP |sort EMPNO | head 3| fieldformat total = sum(1, 2, 3) | fields EMPNO, total"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], total=[+(1, +(2, 3))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = "EMPNO=7369; total=6\nEMPNO=7499; total=6\nEMPNO=7521; total=6\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `EMPNO`, 1 + (2 + 3) `total`\nFROM `scott`.`EMP`\nORDER BY `EMPNO`\nLIMIT 3"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormatWithToNumber() { + String ppl = + "source=EMP | sort EMPNO | head 3| fieldformat total = sum(SAL, COMM, 100) | fieldformat" + + " total = \"$\".cast(total as string) | fields EMPNO, total"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], total=[||('$':VARCHAR, NUMBER_TO_STRING(+($5, +($6, 100))))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; total=null\nEMPNO=7499; total=$2000.00\nEMPNO=7521; total=$1850.00\n"; + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, '$' || NUMBER_TO_STRING(`SAL` + (`COMM` + 100)) `total`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testComplexFieldFormatCommands4() { + String ppl = + "source=EMP | fieldformat col1 = SAL | sort - col1 | head 3 | fields ENAME, col1 |" + + " fieldformat col2 = col1 | sort + col2 | fields ENAME, col2 | fieldformat col3 =" + + " col2 | head 2 | fields HIREDATE, col3"; + IllegalArgumentException e = + assertThrows( + IllegalArgumentException.class, + () -> { + RelNode root = getRelNode(ppl); + }); + assertThat(e.getMessage(), is("Field [HIREDATE] not found.")); + } + + @Test + public void testFieldFormatMaxOnStrings() { + String ppl = + "source=EMP | sort EMPNO | head 3 |fieldformat a = \"Max String:\".max('banana', 'Door'," + + " ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], a=[||('Max String:':VARCHAR, SCALAR_MAX('banana':VARCHAR," + + " 'Door':VARCHAR, $1))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; a=Max String:banana\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; a=Max String:banana\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; a=Max String:banana\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, 'Max String:'" + + " || SCALAR_MAX('banana', 'Door', `ENAME`) `a`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormatMaxOnStringsWithSuffix() { + String ppl = + "source=EMP | sort EMPNO | head 3 |fields EMPNO, ENAME| fieldformat a = max('banana'," + + " 'Door', ENAME).\" after comparing with provided constant strings and ENAME column" + + " values.\""; + + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], a=[||(SCALAR_MAX('banana':VARCHAR, 'Door':VARCHAR," + + " $1), ' after comparing with provided constant strings and ENAME column" + + " values.':VARCHAR)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; a=banana after comparing with provided constant strings and ENAME" + + " column values.\n" + + "EMPNO=7499; ENAME=ALLEN; a=banana after comparing with provided constant strings and" + + " ENAME column values.\n" + + "EMPNO=7521; ENAME=WARD; a=banana after comparing with provided constant strings and" + + " ENAME column values.\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, SCALAR_MAX('banana', 'Door', `ENAME`) || ' after comparing with" + + " provided constant strings and ENAME column values.' `a`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFieldFormatMaxOnStringsWithPrefixSuffix() { + String ppl = + "source=EMP | sort EMPNO | head 3 |fields EMPNO, ENAME| fieldformat a = \"Max String:" + + " \\\"\".max('banana', 'Door', ENAME).\"\\\" after comparing with provided constant" + + " strings and ENAME column values.\""; + // # + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], a=[||(||('Max String: \"':VARCHAR," + + " SCALAR_MAX('banana':VARCHAR, 'Door':VARCHAR, $1)), '\" after comparing with" + + " provided constant strings and ENAME column values.':VARCHAR)])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; a=Max String: \"banana\" after comparing with provided constant" + + " strings and ENAME column values.\n" + + "EMPNO=7499; ENAME=ALLEN; a=Max String: \"banana\" after comparing with provided" + + " constant strings and ENAME column values.\n" + + "EMPNO=7521; ENAME=WARD; a=Max String: \"banana\" after comparing with provided" + + " constant strings and ENAME column values.\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, 'Max String: \"' || SCALAR_MAX('banana', 'Door', `ENAME`) || '\"" + + " after comparing with provided constant strings and ENAME column values.' `a`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testEvalMinOnNumericAndString() { + String ppl = + "source=EMP | sort EMPNO | head 3| fields EMPNO, ENAME, DEPTNO | fieldformat a = \"Minimum" + + " of DEPTNO, ENAME and Provided list of 5, 30, 'banana', 'Door': \".min(5, 30," + + " DEPTNO, 'banana', 'Door', ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], DEPTNO=[$7], a=[||('Minimum of DEPTNO, ENAME and" + + " Provided list of 5, 30, ''banana'', ''Door'': ':VARCHAR, SCALAR_MIN(5, 30, $7," + + " 'banana':VARCHAR, 'Door':VARCHAR, $1))])\n" + + " LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[3])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "EMPNO=7369; ENAME=SMITH; DEPTNO=20; a=Minimum of DEPTNO, ENAME and Provided list of 5," + + " 30, 'banana', 'Door': 5\n" + + "EMPNO=7499; ENAME=ALLEN; DEPTNO=30; a=Minimum of DEPTNO, ENAME and Provided list of " + + " 5, 30, 'banana', 'Door': 5\n" + + "EMPNO=7521; ENAME=WARD; DEPTNO=30; a=Minimum of DEPTNO, ENAME and Provided list of " + + " 5, 30, 'banana', 'Door': 5\n"; + + verifyResult(root, expectedResult); + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `DEPTNO`, 'Minimum of DEPTNO, ENAME and Provided list of 5, 30," + + " ''banana'', ''Door'': ' || SCALAR_MIN(5, 30, `DEPTNO`, 'banana', 'Door', `ENAME`)" + + " `a`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO`\n" + + "LIMIT 3"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvCombineTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvCombineTest.java new file mode 100644 index 00000000000..6e6460a2365 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLMvCombineTest.java @@ -0,0 +1,273 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import static org.junit.Assert.assertThrows; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +public class CalcitePPLMvCombineTest extends CalcitePPLAbstractTest { + + public CalcitePPLMvCombineTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + + ImmutableList rows = + ImmutableList.of( + // existing "basic" + new Object[] {"basic", "A", 10}, + new Object[] {"basic", "A", 20}, + new Object[] {"basic", "B", 60}, + new Object[] {"basic", "A", 30}, + + // NULL target values case (Splunk-style: nulls do NOT contribute to mv) + new Object[] {"nulls", "A", null}, + new Object[] {"nulls", "A", 10}, + new Object[] {"nulls", "B", null}, + + // single-row case + new Object[] {"single", "Z", 5}); + + schema.add("MVCOMBINE_DATA", new MvCombineDataTable(rows)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testMvCombineBasic() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"basic\" " + + "| fields case, ip, packets " + + "| mvcombine packets " + + "| sort ip"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2])\n" + + " LogicalAggregate(group=[{0, 1}], packets=[ARRAY_AGG($2) FILTER $3])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2], $f3=[IS NOT NULL($2)])\n" + + " LogicalFilter(condition=[=($0, 'basic')])\n" + + " LogicalTableScan(table=[[scott, MVCOMBINE_DATA]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `case`, `ip`, ARRAY_AGG(`packets`) FILTER (WHERE `packets` IS NOT NULL) `packets`\n" + + "FROM `scott`.`MVCOMBINE_DATA`\n" + + "WHERE `case` = 'basic'\n" + + "GROUP BY `case`, `ip`\n" + + "ORDER BY `ip`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvCombineWithNullTargetValues() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"nulls\" " + + "| fields case, ip, packets " + + "| mvcombine packets " + + "| sort ip"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2])\n" + + " LogicalAggregate(group=[{0, 1}], packets=[ARRAY_AGG($2) FILTER $3])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2], $f3=[IS NOT NULL($2)])\n" + + " LogicalFilter(condition=[=($0, 'nulls')])\n" + + " LogicalTableScan(table=[[scott, MVCOMBINE_DATA]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `case`, `ip`, ARRAY_AGG(`packets`) FILTER (WHERE `packets` IS NOT NULL) `packets`\n" + + "FROM `scott`.`MVCOMBINE_DATA`\n" + + "WHERE `case` = 'nulls'\n" + + "GROUP BY `case`, `ip`\n" + + "ORDER BY `ip`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvCombineWithDelimOption_SplunkSyntaxOrder() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"basic\" " + + "| fields case, ip, packets " + + "| mvcombine packets delim='|' " + + "| sort ip"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2])\n" + + " LogicalAggregate(group=[{0, 1}], packets=[ARRAY_AGG($2) FILTER $3])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2], $f3=[IS NOT NULL($2)])\n" + + " LogicalFilter(condition=[=($0, 'basic')])\n" + + " LogicalTableScan(table=[[scott, MVCOMBINE_DATA]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `case`, `ip`, ARRAY_AGG(`packets`) FILTER (WHERE `packets` IS NOT NULL) `packets`\n" + + "FROM `scott`.`MVCOMBINE_DATA`\n" + + "WHERE `case` = 'basic'\n" + + "GROUP BY `case`, `ip`\n" + + "ORDER BY `ip`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMvCombineNonExistentField() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"basic\" " + + "| fields case, ip, packets " + + "| mvcombine does_not_exist"; + + Exception ex = assertThrows(Exception.class, () -> getRelNode(ppl)); + + String msg = String.valueOf(ex.getMessage()); + org.junit.Assert.assertTrue( + "Expected error message to mention missing field. Actual: " + msg, + msg.toLowerCase().contains("does_not_exist") || msg.toLowerCase().contains("field")); + } + + @Test + public void testMvCombineSingleRow() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"single\" " + + "| fields case, ip, packets " + + "| mvcombine packets " + + "| sort ip"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2])\n" + + " LogicalAggregate(group=[{0, 1}], packets=[ARRAY_AGG($2) FILTER $3])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2], $f3=[IS NOT NULL($2)])\n" + + " LogicalFilter(condition=[=($0, 'single')])\n" + + " LogicalTableScan(table=[[scott, MVCOMBINE_DATA]])\n"; + verifyLogical(root, expectedLogical); + } + + @Test + public void testMvCombineEmptyResult() { + String ppl = + "source=MVCOMBINE_DATA " + + "| where case = \"no_such_case\" " + + "| fields case, ip, packets " + + "| mvcombine packets " + + "| sort ip"; + + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2])\n" + + " LogicalAggregate(group=[{0, 1}], packets=[ARRAY_AGG($2) FILTER $3])\n" + + " LogicalProject(case=[$0], ip=[$1], packets=[$2], $f3=[IS NOT NULL($2)])\n" + + " LogicalFilter(condition=[=($0, 'no_such_case')])\n" + + " LogicalTableScan(table=[[scott, MVCOMBINE_DATA]])\n"; + verifyLogical(root, expectedLogical); + } + + // ======================================================================== + // Custom ScannableTable for deterministic mvcombine planning tests + // ======================================================================== + + @RequiredArgsConstructor + static class MvCombineDataTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("case", SqlTypeName.VARCHAR) + .nullable(true) + .add("ip", SqlTypeName.VARCHAR) + .nullable(true) + .add("packets", SqlTypeName.INTEGER) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java new file mode 100644 index 00000000000..b6b60c530e7 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -0,0 +1,256 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLTransposeTest extends CalcitePPLAbstractTest { + + public CalcitePPLTransposeTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testSimpleCountWithTranspose() { + String ppl = "source=EMP | stats count() as c|transpose"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + + " 5_null=[MAX($0) FILTER $6])\n" + + " LogicalProject(value=[CAST($3):VARCHAR NOT NULL], $f4=[TRIM(FLAG(BOTH), ' '," + + " $2)], $f5=[=($1, 1)], $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + + " 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," + + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], c=[COUNT()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'c' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "column=c; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 5) `row 5`\n" + + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" + + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t0`\n" + + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + + "WHERE `t2`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleAggregatesWithAliasesTranspose() { + String ppl = + "source=EMP | stats avg(SAL) as avg_sal, max(SAL) as max_sal, min(SAL) as min_sal, count()" + + " as cnt|transpose "; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + + " 5_null=[MAX($0) FILTER $6])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)], $f12=[=($4," + + " 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," + + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + + " min_sal=[MIN($0)], cnt=[COUNT()])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + + " 'cnt' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=max_sal; row 1=5000.00; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=cnt; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=min_sal; row 1=800.00; row 2=null; row 3=null; row 4=null; row 5=null\n"; + + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 5) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + /* + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`__row_id__`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + + */ + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimit() { + String ppl = "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column=COMM; row 1=null; row 2=300.00; row 3=500.00\n" + + "column=JOB; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column=SAL; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" + + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + + " `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimitColumnName() { + String ppl = + "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3 column_name='column_names'"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "column_names=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column_names=COMM; row 1=null; row 2=300.00; row 3=500.00\n" + + "column_names=JOB; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column_names=SAL; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column_names`) `column_names`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" + + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" + + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" + + " NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column_names`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column_names`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java index a4d4f4874ad..3418411f9c5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java @@ -172,6 +172,16 @@ public void testMultiRelationResult() { assertEquals("*", result.getWildcard().toString()); } + @Test + public void testMvCombineAddsTargetFieldToRequirements() { + assertSingleRelationFields("source=logs | mvcombine packets_str", Set.of("packets_str"), "*"); + } + + @Test + public void testMvCombineAddsWildcard() { + assertSingleRelationFields("source=logs | mvcombine packets_str", Set.of("packets_str"), "*"); + } + @Test public void testSimpleJoin() { assertMultiRelationFields( @@ -371,6 +381,38 @@ public void testAppend() { "sub", new FieldResolutionResult(Set.of("a", "c", "testCase"), "*"))); } + @Test + public void testAppendCol() { + String query = + "source=main | where testCase='simple' | eval c = 4 | " + + "appendcol [where testCase='simple' ] | fields a, c, *"; + assertMultiRelationFields( + query, Map.of("main", new FieldResolutionResult(Set.of("a", "testCase"), "*"))); + } + + @Test + public void testAppendpipe() { + String query = + "source=main | where testCase='simple' | stats sum(a) as sum_a by b | " + + "appendpipe [stats sum(sum_a) as total] | head 5"; + assertMultiRelationFields( + query, Map.of("main", new FieldResolutionResult(Set.of("a", "b", "testCase")))); + } + + @Test + public void testMultisearch() { + String query = + "| multisearch [source=main | where testCase='simple'] [source=sub | where" + + " testCase='simple'] | fields a, c, *"; + assertMultiRelationFields( + query, + Map.of( + "main", + new FieldResolutionResult(Set.of("a", "c", "testCase"), "*"), + "sub", + new FieldResolutionResult(Set.of("a", "c", "testCase"), "*"))); + } + @Test public void testAppendWithSpathInMain() { String query = diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2fd08988f6b..1e200eb092b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -247,6 +247,13 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testTransposeCommand() { + assertEquals( + "source=table | transpose 5 column_name=***", + anonymize("source=t | transpose 5 column_name='column_names'")); + } + @Test public void testTrendlineCommand() { assertEquals( @@ -1010,4 +1017,17 @@ public void testMvfind() { "source=t | eval result=mvfind(array('apple', 'banana', 'apricot'), 'ban.*') | fields" + " result")); } + + @Test + public void testMvcombineCommand() { + assertEquals( + "source=table | mvcombine delim=*** identifier", anonymize("source=t | mvcombine age")); + } + + @Test + public void testMvcombineCommandWithDelim() { + assertEquals( + "source=table | mvcombine delim=*** identifier", + anonymize("source=t | mvcombine age delim=','")); + } } diff --git a/release-notes/opensearch-sql.release-notes-3.5.0.0.md b/release-notes/opensearch-sql.release-notes-3.5.0.0.md new file mode 100644 index 00000000000..03dcc77e70c --- /dev/null +++ b/release-notes/opensearch-sql.release-notes-3.5.0.0.md @@ -0,0 +1,77 @@ +## Version 3.5.0 Release Notes + +Compatible with OpenSearch and OpenSearch Dashboards version 3.5.0 + +### Features +* Feature tonumber : issue #4514 tonumber function as part of roadmap #4287 ([#4605](https://github.com/opensearch-project/sql/pull/4605)) +* Feature addtotals and addcoltotals ([#4754](https://github.com/opensearch-project/sql/pull/4754)) +* Support `mvzip` eval function ([#4805](https://github.com/opensearch-project/sql/pull/4805)) +* Support `split` eval function ([#4814](https://github.com/opensearch-project/sql/pull/4814)) +* Support `mvfind` eval function ([#4839](https://github.com/opensearch-project/sql/pull/4839)) +* Support `mvmap` eval function ([#4856](https://github.com/opensearch-project/sql/pull/4856)) +* [Feature] implement transpose command as in the roadmap #4786 ([#5011](https://github.com/opensearch-project/sql/pull/5011)) +* Feature/mvcombine ([#5025](https://github.com/opensearch-project/sql/pull/5025)) +* Implement spath command with field resolution ([#5028](https://github.com/opensearch-project/sql/pull/5028)) + +### Enhancements +* ML command supports category_field parameter ([#3909](https://github.com/opensearch-project/sql/pull/3909)) +* Time Unit Unification for bin/stats ([#4450](https://github.com/opensearch-project/sql/pull/4450)) +* Enhance doc and error message handling for `bins` on time-related fields ([#4713](https://github.com/opensearch-project/sql/pull/4713)) +* Push down filters on nested fields as nested queries ([#4825](https://github.com/opensearch-project/sql/pull/4825)) +* Support sort expression pushdown for SortMergeJoin ([#4830](https://github.com/opensearch-project/sql/pull/4830)) +* Add unified query transpiler API ([#4871](https://github.com/opensearch-project/sql/pull/4871)) +* Pushdown join with `max=n` option to TopHits aggregation ([#4929](https://github.com/opensearch-project/sql/pull/4929)) +* Support pushdown dedup with expression ([#4957](https://github.com/opensearch-project/sql/pull/4957)) +* Add scalar min/max to BuiltinFunctionName ([#4967](https://github.com/opensearch-project/sql/pull/4967)) +* Add unified query compiler API ([#4974](https://github.com/opensearch-project/sql/pull/4974)) +* Support nested aggregation when calcite enabled ([#4979](https://github.com/opensearch-project/sql/pull/4979)) +* Support profile options for PPL - Part I Implement phases level metrics. ([#4983](https://github.com/opensearch-project/sql/pull/4983)) +* Dedup pushdown (TopHits Agg) should work with Object fields ([#4991](https://github.com/opensearch-project/sql/pull/4991)) +* Support enumerable TopK ([#4993](https://github.com/opensearch-project/sql/pull/4993)) +* Prune old in operator push down rules ([#4992](https://github.com/opensearch-project/sql/pull/4992)) +* RexCall and RelDataType standardization for script push down ([#4914](https://github.com/opensearch-project/sql/pull/4914)) +* Introduce logical dedup operators for PPL ([#5014](https://github.com/opensearch-project/sql/pull/5014)) +* Support read multi-values from OpenSearch if no codegen triggered ([#5015](https://github.com/opensearch-project/sql/pull/5015)) +* Add unified function interface with function discovery API ([#5039](https://github.com/opensearch-project/sql/pull/5039)) +* Support profile option for PPL - Part II Implement operator level metrics ([#5044](https://github.com/opensearch-project/sql/pull/5044)) +* Support spath with dynamic fields ([#5058](https://github.com/opensearch-project/sql/pull/5058)) +* Adopt appendcol, appendpipe, multisearch to spath ([#5075](https://github.com/opensearch-project/sql/pull/5075)) +* Set `max=1` in join as default when `plugins.ppl.syntax.legacy.preferred=false` ([#5057](https://github.com/opensearch-project/sql/pull/5057)) +* Add OUTPUT as an alias for REPLACE in Lookup ([#5049](https://github.com/opensearch-project/sql/pull/5049)) +* Separate explain mode from format params ([#5042](https://github.com/opensearch-project/sql/pull/5042)) + +### Bug Fixes +* Error handling for dot-containing field names ([#4907](https://github.com/opensearch-project/sql/pull/4907)) +* Replace duplicated aggregation logic with aggregateWithTrimming() ([#4926](https://github.com/opensearch-project/sql/pull/4926)) +* Remove GetAlias Call ([#4981](https://github.com/opensearch-project/sql/pull/4981)) +* Fix PIT context leak in Legacy SQL for non-paginated queries ([#5009](https://github.com/opensearch-project/sql/pull/5009)) +* [BugFix] Not between should use range query ([#5016](https://github.com/opensearch-project/sql/pull/5016)) +* Move Calcite-only tests from CrossClusterSearchIT to CalciteCrossClusterSearchIT ([#5085](https://github.com/opensearch-project/sql/pull/5085)) + +### Infrastructure +* Add workflow for SQL CLI integration tests ([#4770](https://github.com/opensearch-project/sql/pull/4770)) +* Remove access controller step in Calcite script ([#4900](https://github.com/opensearch-project/sql/pull/4900)) +* Adjust CodeRabbit review config ([#4901](https://github.com/opensearch-project/sql/pull/4901)) +* Add micro benchmarks for unified query layer ([#5043](https://github.com/opensearch-project/sql/pull/5043)) +* Improve coderabbit config ([#5048](https://github.com/opensearch-project/sql/pull/5048)) +* Update CodeRabbit instructions ([#4962](https://github.com/opensearch-project/sql/pull/4962)) +* Add feedback reminder for CodeRabbit ([#4932](https://github.com/opensearch-project/sql/pull/4932)) + +### Documentation +* Migrate PPL Documentation from RST to Markdown ([#4912](https://github.com/opensearch-project/sql/pull/4912)) +* [DOC] Callout the aggregation result may be approximate ([#4922](https://github.com/opensearch-project/sql/pull/4922)) +* Show backticks in testing-doctest.md ([#4941](https://github.com/opensearch-project/sql/pull/4941)) +* Escape underscore character in documentation for LIKE ([#4958](https://github.com/opensearch-project/sql/pull/4958)) +* Apply feedback from documentation-website to PPL command docs ([#4997](https://github.com/opensearch-project/sql/pull/4997)) +* Add PPL docs website exporter script ([#4950](https://github.com/opensearch-project/sql/pull/4950)) +* Add version numbers for all settings in the docs ([#5019](https://github.com/opensearch-project/sql/pull/5019)) +* chore: add legacy ppl index.rst for backwards compatibility ([#5026](https://github.com/opensearch-project/sql/pull/5026)) +* Add index.md for PPL functions documentation ([#5033](https://github.com/opensearch-project/sql/pull/5033)) + +### Maintenance +* Remove all AccessController refs ([#4924](https://github.com/opensearch-project/sql/pull/4924)) +* Extract unified query context for shared config management ([#4933](https://github.com/opensearch-project/sql/pull/4933)) +* Remove shadow jar task from build file ([#4955](https://github.com/opensearch-project/sql/pull/4955)) +* Add Frequently Used Big5 PPL Queries ([#4976](https://github.com/opensearch-project/sql/pull/4976)) +* Increment version to 3.5.0 ([#5040](https://github.com/opensearch-project/sql/pull/5040)) +* Upgrade assertj-core to 3.27.7 ([#5100](https://github.com/opensearch-project/sql/pull/5100))