From 642d1448515dadf0d006f73888945934e1acc6a7 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 16 Dec 2025 15:29:57 -0800 Subject: [PATCH 01/24] transpose command implementation Signed-off-by: Asif Bashar --- .../org/opensearch/sql/analysis/Analyzer.java | 6 ++ .../sql/ast/AbstractNodeVisitor.java | 5 + .../opensearch/sql/ast/tree/Transpose.java | 43 ++++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 97 +++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 5 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 17 ++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 9 ++ .../sql/ppl/utils/ArgumentFactory.java | 34 ++++++- 8 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 24cef144c97..5a3d2b6070f 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -99,6 +99,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -703,6 +704,11 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + @Override + public LogicalPlan visitTranspose(Transpose node, AnalysisContext context) { + throw getOnlyForCalciteException("Transpose"); + } + @Override public LogicalPlan visitBin(Bin node, AnalysisContext context) { throw getOnlyForCalciteException("Bin"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..be5bce69032 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -86,6 +86,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -282,6 +283,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitTranspose(Transpose node, C context) { + return visitChildren(node, context); + } + public T visitChart(Chart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java new file mode 100644 index 00000000000..3c0c9ebe05d --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import java.util.Map; + +import lombok.*; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.Literal; + +/** AST node represent Transpose operation. */ + + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Transpose extends UnresolvedPlan { + private final List arguments; + private UnresolvedPlan child; + @Override + public Transpose attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitTranspose(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 097bc81cfed..6529d13429f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -696,6 +696,103 @@ public RelNode visitReverse( return context.relBuilder.peek(); } + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + visitChildren(node, context); + + // Get the current schema to transpose + RelNode currentNode = context.relBuilder.peek(); + List fieldNames = currentNode.getRowType().getFieldNames(); + + if (fieldNames.isEmpty()) { + return currentNode; + } + + // Create a list to hold all transpose rows + List transposeRows = new ArrayList<>(); + + // Create column header mapping: "column", "row1", "row2", etc. + List outputColumns = new ArrayList<>(); + outputColumns.add("column"); // First column will contain original column names + + // Add row headers (row1, row2, etc.) based on number of rows in input + // For now, we'll use a fixed approach - in real usage this would depend on actual data + // Add enough columns to handle the transposition + for (int i = 1; i <= Math.max(10, fieldNames.size()); i++) { + outputColumns.add("row" + i); + } + + // For each original column, create a row in the transposed result + for (int colIndex = 0; colIndex < fieldNames.size(); colIndex++) { + String originalColumnName = fieldNames.get(colIndex); + + // Create values for this transposed row + List rowValues = new ArrayList<>(); + + // First value is the original column name + rowValues.add(context.rexBuilder.makeLiteral(originalColumnName)); + + // Add placeholder values for now (in a real implementation, these would be actual data + // values) + // This is a simplified approach - the actual implementation would need to: + // 1. Collect all rows from the input + // 2. Extract values from the specific column across all rows + // 3. Place those values as the new row values + for (int rowIndex = 1; rowIndex < outputColumns.size(); rowIndex++) { + RexNode placeholder = + context.rexBuilder.makeNullLiteral( + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)); + rowValues.add(placeholder); + } + + // Create a Values node for this row + RelDataType rowType = + context + .rexBuilder + .getTypeFactory() + .createStructType( + outputColumns.stream() + .map( + name -> + context + .rexBuilder + .getTypeFactory() + .createSqlType(SqlTypeName.VARCHAR)) + .collect(Collectors.toList()), + outputColumns); + + // For now, create a simple projection that represents the transposed structure + // This is a placeholder implementation + } + + // Simplified implementation: just return a projection with renamed columns + // indicating the transpose operation structure + List projections = new ArrayList<>(); + List newFieldNames = new ArrayList<>(); + + // Create a single row showing the transpose concept + projections.add(context.rexBuilder.makeLiteral("column_names")); + newFieldNames.add("column"); + + // Add original field names as row data + for (int i = 0; i < Math.min(fieldNames.size(), 10); i++) { + if (i < fieldNames.size()) { + projections.add(context.rexBuilder.makeLiteral(fieldNames.get(i))); + } else { + projections.add( + context.rexBuilder.makeNullLiteral( + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR))); + } + newFieldNames.add("row" + (i + 1)); + } + + // Create the final projection + context.relBuilder.project(projections, newFieldNames); + + return context.relBuilder.peek(); + } + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..69fb267db11 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -46,6 +46,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; @@ -156,6 +157,10 @@ TIMEFIELD: 'TIMEFIELD'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; +COLUMN_NAME: 'COLUMN_NAME'; +HEADER_FIELD: 'HEADER_FIELD'; +INCLUDE_EMPTY: 'INCLUDE_EMPTY'; + // COMPARISON FUNCTION KEYWORDS CASE: 'CASE'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7045796a03c..c12ef9a4520 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -85,6 +85,7 @@ commands | regexCommand | chartCommand | timechartCommand + | transposeCommand | rexCommand | appendPipeCommand | replaceCommand @@ -328,6 +329,18 @@ timechartCommand : TIMECHART timechartParameter* statsAggTerm (BY fieldExpression)? timechartParameter* ; +transposeCommand + : TRANSPOSE transposeParameter* + ; + +transposeParameter + : number = integerLiteral + | COLUMN_NAME EQUAL stringLiteral + | HEADER_FIELD EQUAL stringLiteral + | INCLUDE_EMPTY EQUAL booleanLiteral + ; + + timechartParameter : LIMIT EQUAL integerLiteral | SPAN EQUAL spanLiteral @@ -1657,5 +1670,9 @@ searchableKeyWord | FIELDNAME | ROW | COL + | TRANSPOSE + | COLUMN_NAME + | HEADER_FIELD + | INCLUDE_EMPTY ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3f4f3049365..27d91b3cd8d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -111,6 +111,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; @@ -736,6 +737,14 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Transpose command. */ + @Override + public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { + List arguments = ArgumentFactory.getArgumentList(ctx); + return new Transpose( arguments); + + } + /** Chart command. */ @Override public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index ed76b29b77a..c5fb7cbf352 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -308,7 +308,39 @@ public static List getArgumentList( return arguments; } - /** + public static List getArgumentList( + OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { + List arguments = new ArrayList<>(); + for (OpenSearchPPLParser.TransposeParameterContext ctx : transposeCommandContext.transposeParameter()) { + + if (ctx.COLUMN_NAME() != null) { + Literal columnName = getArgumentValue(ctx.stringLiteral()); + arguments.add(new Argument("columnName", columnName)); + } else if (ctx.HEADER_FIELD() != null) { + Literal headerField = getArgumentValue(ctx.stringLiteral()); + arguments.add(new Argument("headerField", headerField)); + } else if (ctx.INCLUDE_EMPTY() != null) { + Literal includeEmpty; + if (ctx.booleanLiteral() != null) { + includeEmpty = getArgumentValue(ctx.booleanLiteral()); + }else { + throw new IllegalArgumentException("value for includeEmpty must be a boolean"); + } + arguments.add(new Argument("includeEmpty", includeEmpty)); + } else if (ctx.number !=null ) { + + arguments.add(new Argument("number",getArgumentValue(ctx.number))); + }else { + throw new IllegalArgumentException( + String.format( + "A parameter of transpose must be a int limit, column_name, header_field, or include_empty, got %s", + ctx)); + } + } + return arguments; + } + + /** * Get list of {@link Argument}. * * @param ctx RareCommandContext instance From 98a0cc850cbf31a8288ec9984347efdf9c60ba8e Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 27 Dec 2025 15:40:04 -0800 Subject: [PATCH 02/24] transpose rows to columns Signed-off-by: Asif Bashar --- .../opensearch/sql/ast/tree/Transpose.java | 24 +- .../sql/calcite/CalciteRelNodeVisitor.java | 227 +++++++++++------- docs/category.json | 1 + docs/user/ppl/cmd/transpose.md | 92 +++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 4 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 9 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 5 +- .../sql/ppl/utils/ArgumentFactory.java | 49 ++-- 8 files changed, 278 insertions(+), 133 deletions(-) create mode 100644 docs/user/ppl/cmd/transpose.md diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 3c0c9ebe05d..1a31eef24f4 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -7,24 +7,36 @@ import com.google.common.collect.ImmutableList; import java.util.List; -import java.util.Map; - import lombok.*; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Argument; -import org.opensearch.sql.ast.expression.Literal; /** AST node represent Transpose operation. */ - - @Getter @Setter @ToString @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor public class Transpose extends UnresolvedPlan { - private final List arguments; + private final java.util.Map arguments; private UnresolvedPlan child; + + public Integer getMaxRows(){ + Integer maxRows= 5; + if (arguments.containsKey("number")) { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } + return maxRows; + } + + public String getColumnName() { + String columnName = "column"; + if (arguments.containsKey("columnName")) { + columnName = arguments.get("columnName").getValue().toString(); + } + return columnName; + } + @Override public Transpose attach(UnresolvedPlan child) { this.child = child; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 6529d13429f..73fcdad4760 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -29,6 +29,11 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Streams; +import java.math.BigDecimal; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; @@ -69,6 +74,8 @@ import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilder.AggCall; +import org.apache.calcite.tools.RelRunner; +import org.apache.calcite.tools.RelRunners; import org.apache.calcite.util.Holder; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.Pair; @@ -171,7 +178,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor fieldNames = currentNode.getRowType().getFieldNames(); - - if (fieldNames.isEmpty()) { - return currentNode; - } + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + visitChildren(node, context); + java.util.Map arguments = node.getArguments(); + Integer maxRows= node.getMaxRows(); - // Create a list to hold all transpose rows - List transposeRows = new ArrayList<>(); + String columnName = node.getColumnName(); - // Create column header mapping: "column", "row1", "row2", etc. - List outputColumns = new ArrayList<>(); - outputColumns.add("column"); // First column will contain original column names + // Get the current schema to transpose + RelNode currentNode = context.relBuilder.peek(); + List fieldNames = currentNode.getRowType().getFieldNames(); + List fields = currentNode.getRowType().getFieldList(); - // Add row headers (row1, row2, etc.) based on number of rows in input - // For now, we'll use a fixed approach - in real usage this would depend on actual data - // Add enough columns to handle the transposition - for (int i = 1; i <= Math.max(10, fieldNames.size()); i++) { - outputColumns.add("row" + i); - } + if (fieldNames.isEmpty()) { + return currentNode; + } - // For each original column, create a row in the transposed result - for (int colIndex = 0; colIndex < fieldNames.size(); colIndex++) { - String originalColumnName = fieldNames.get(colIndex); + // Step 1: Add row numbers to identify each row uniquely + RexNode rowNumber = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_id__"); + context.relBuilder.projectPlus(rowNumber); + + // Step 2: Unpivot the data - convert columns to rows + // Each field becomes a row with: row_id, column, value + List measureColumns = ImmutableList.of("value"); + List axisColumns = ImmutableList.of(columnName); + + // Create the unpivot value mappings + List, List>> valueMappings = new ArrayList<>(); + RelDataType varcharType = + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + for (String fieldName : fieldNames) { + if (fieldName.equals("__row_id__")) { + continue; // Skip the row number column + } - // Create values for this transposed row - List rowValues = new ArrayList<>(); + // Create the axis value (column name as literal) + RexLiteral columnNameLiteral = context.rexBuilder.makeLiteral(fieldName); + List axisValues = ImmutableList.of(columnNameLiteral); - // First value is the original column name - rowValues.add(context.rexBuilder.makeLiteral(originalColumnName)); + // Create the measure value (field expression cast to VARCHAR) + RexNode fieldValue = context.relBuilder.field(fieldName); + RexNode castValue = context.rexBuilder.makeCast(varcharType, fieldValue, true); + List measureValues = ImmutableList.of(castValue); - // Add placeholder values for now (in a real implementation, these would be actual data - // values) - // This is a simplified approach - the actual implementation would need to: - // 1. Collect all rows from the input - // 2. Extract values from the specific column across all rows - // 3. Place those values as the new row values - for (int rowIndex = 1; rowIndex < outputColumns.size(); rowIndex++) { - RexNode placeholder = - context.rexBuilder.makeNullLiteral( - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)); - rowValues.add(placeholder); - } + // Create the mapping entry + valueMappings.add(new AbstractMap.SimpleEntry<>(axisValues, measureValues)); + } - // Create a Values node for this row - RelDataType rowType = - context - .rexBuilder - .getTypeFactory() - .createStructType( - outputColumns.stream() - .map( - name -> - context - .rexBuilder - .getTypeFactory() - .createSqlType(SqlTypeName.VARCHAR)) - .collect(Collectors.toList()), - outputColumns); - - // For now, create a simple projection that represents the transposed structure - // This is a placeholder implementation - } - - // Simplified implementation: just return a projection with renamed columns - // indicating the transpose operation structure - List projections = new ArrayList<>(); - List newFieldNames = new ArrayList<>(); + // Apply the unpivot operation + context.relBuilder.unpivot( + false, // includeNulls = false + measureColumns, // measure column names: ["value"] + axisColumns, // axis column names: ["column"] + valueMappings // field mappings + ); + + // Step 3: Pivot the data to transpose rows as columns + // Pivot on __row_id__ with column as the grouping key + // This creates: column, row1, row2, row3, ... + + // Get unique row IDs to create column names + RelNode unpivotedData = context.relBuilder.build(); + + // Create aggregation calls for each row - we'll use MAX since we know each cell has only one + // value + List pivotColumns = new ArrayList<>(); + List pivotValues = new ArrayList<>(); + + // We need to determine how many rows we have to create the proper pivot structure + // For now, we'll use a different approach - use conditional aggregation + + // Get the unpivoted data back on stack + context.relBuilder.push(unpivotedData); + + // Create conditional aggregations for each row position + // We'll use ROW_NUMBER to determine the row positions dynamically + RexNode rowPos = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .partitionBy(context.relBuilder.field(columnName)) + .orderBy(context.relBuilder.field("__row_id__")) + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_pos__"); + context.relBuilder.projectPlus(rowPos); + + // Now we'll use a different strategy - collect all values per column and then split them + // Group by column and collect all values in order + List groupByKeys = ImmutableList.of(context.relBuilder.field(columnName)); + + // For simplicity, we'll use a direct approach with conditional aggregations + // instead of STRING_AGG which may not be available + + // For simplicity, let's use a manual approach that works with the available Calcite operations + // We'll create a query that manually builds the transposed result + + // Reset and try a simpler approach + context.relBuilder.push(unpivotedData); + + // Let's manually build the pivot by creating conditional aggregations + // First, get distinct row IDs + context.relBuilder.aggregate( + context.relBuilder.groupKey(), + context.relBuilder.max(context.relBuilder.field("__row_id__")).as("max_row_id")); + + + + // Go back to unpivoted data and create the pivot manually + context.relBuilder.clear(); + context.relBuilder.push(unpivotedData); + + // Create aggregation calls for each possible row position + List pivotAggCalls = new ArrayList<>(); + + for (int i = 1; i <= maxRows; i++) { + // Create CASE WHEN __row_id__ = i THEN value END for each row position + RexNode caseExpr = + context.relBuilder.call( + SqlStdOperatorTable.CASE, + context.relBuilder.equals( + context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), + context.relBuilder.field("value"), + context.relBuilder.literal(null)); + + AggCall maxCase = context.relBuilder.max(caseExpr).as("row " + i); + pivotAggCalls.add(maxCase); + } - // Create a single row showing the transpose concept - projections.add(context.rexBuilder.makeLiteral("column_names")); - newFieldNames.add("column"); + // Group by column and apply the conditional aggregations + context.relBuilder.aggregate( + context.relBuilder.groupKey(context.relBuilder.field(columnName)), pivotAggCalls); - // Add original field names as row data - for (int i = 0; i < Math.min(fieldNames.size(), 10); i++) { - if (i < fieldNames.size()) { - projections.add(context.rexBuilder.makeLiteral(fieldNames.get(i))); - } else { - projections.add( - context.rexBuilder.makeNullLiteral( - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR))); - } - newFieldNames.add("row" + (i + 1)); + return context.relBuilder.peek(); } - // Create the final projection - context.relBuilder.project(projections, newFieldNames); - return context.relBuilder.peek(); - } - - @Override + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/docs/category.json b/docs/category.json index 094768d1e6f..620ebc90ade 100644 --- a/docs/category.json +++ b/docs/category.json @@ -43,6 +43,7 @@ "user/ppl/cmd/timechart.md", "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", + "user/ppl/cmd/transpose.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md new file mode 100644 index 00000000000..98015ef82aa --- /dev/null +++ b/docs/user/ppl/cmd/transpose.md @@ -0,0 +1,92 @@ +# transpose + +## Description + +The `transpose` command outputs the requested number of rows as columns, effectively transposing each result row into a corresponding column of field values. + +## Syntax + +transpose [int] [column_name=] + +* number-of-rows: optional. The number of rows to transform into columns. +* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. + + +## Example 1: Transpose results + +This example shows transposing wihtout any parameters. It transforms 5 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+-------+-------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | row 5 | +|----------------|-------+-------+-------+-------+-------+ +| account_number | | | | | | +| firstname | | | | | | +| lastname | | | | | | +| balance | | | | | | ++----------------+-------+-------+-------+-------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+-------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------|-------+-------+-------+-------+ +| account_number | | | | | +| firstname | | | | | +| lastname | | | | | +| balance | | | | | ++----------------+-------+-------+-------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+-------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------|-------+-------+-------+-------+ +| account_number | | | | | +| firstname | | | | | +| lastname | | | | | +| balance | | | | | ++----------------+-------+-------+-------+-------+ +``` + +## Limitations + +The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 69fb267db11..c16d15c30b8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -71,6 +71,7 @@ LABEL: 'LABEL'; SHOW_NUMBERED_TOKEN: 'SHOW_NUMBERED_TOKEN'; AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; +COLUMN_NAME: 'COLUMN_NAME'; //Native JOIN KEYWORDS JOIN: 'JOIN'; @@ -157,9 +158,6 @@ TIMEFIELD: 'TIMEFIELD'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; -COLUMN_NAME: 'COLUMN_NAME'; -HEADER_FIELD: 'HEADER_FIELD'; -INCLUDE_EMPTY: 'INCLUDE_EMPTY'; // COMPARISON FUNCTION KEYWORDS diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c12ef9a4520..57f935846b7 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -334,10 +334,8 @@ transposeCommand ; transposeParameter - : number = integerLiteral - | COLUMN_NAME EQUAL stringLiteral - | HEADER_FIELD EQUAL stringLiteral - | INCLUDE_EMPTY EQUAL booleanLiteral + : (number = integerLiteral) + | (COLUMN_NAME EQUAL stringLiteral) ; @@ -1672,7 +1670,6 @@ searchableKeyWord | COL | TRANSPOSE | COLUMN_NAME - | HEADER_FIELD - | INCLUDE_EMPTY + | NUMBER ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 27d91b3cd8d..6935e76b8ec 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -740,9 +740,8 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont /** Transpose command. */ @Override public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { - List arguments = ArgumentFactory.getArgumentList(ctx); - return new Transpose( arguments); - + java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); + return new Transpose(arguments); } /** Chart command. */ diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index c5fb7cbf352..88ea5938783 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -308,39 +308,30 @@ public static List getArgumentList( return arguments; } - public static List getArgumentList( - OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { - List arguments = new ArrayList<>(); - for (OpenSearchPPLParser.TransposeParameterContext ctx : transposeCommandContext.transposeParameter()) { + public static Map getArgumentList( + OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { + Map arguments = new HashMap<>(); + for (OpenSearchPPLParser.TransposeParameterContext ctx : + transposeCommandContext.transposeParameter()) { - if (ctx.COLUMN_NAME() != null) { - Literal columnName = getArgumentValue(ctx.stringLiteral()); - arguments.add(new Argument("columnName", columnName)); - } else if (ctx.HEADER_FIELD() != null) { - Literal headerField = getArgumentValue(ctx.stringLiteral()); - arguments.add(new Argument("headerField", headerField)); - } else if (ctx.INCLUDE_EMPTY() != null) { - Literal includeEmpty; - if (ctx.booleanLiteral() != null) { - includeEmpty = getArgumentValue(ctx.booleanLiteral()); - }else { - throw new IllegalArgumentException("value for includeEmpty must be a boolean"); - } - arguments.add(new Argument("includeEmpty", includeEmpty)); - } else if (ctx.number !=null ) { + if (ctx.COLUMN_NAME() != null) { + Literal columnName = getArgumentValue(ctx.stringLiteral()); + arguments.put("columnName", new Argument("columnName", columnName)); + } else if (ctx.number != null) { - arguments.add(new Argument("number",getArgumentValue(ctx.number))); - }else { - throw new IllegalArgumentException( - String.format( - "A parameter of transpose must be a int limit, column_name, header_field, or include_empty, got %s", - ctx)); - } - } - return arguments; + arguments.put("number", new Argument("number", getArgumentValue(ctx.number))); + } else { + throw new IllegalArgumentException( + String.format( + "A parameter of transpose must be a int limit, column_name, header_field, or" + + " include_empty, got %s", + ctx)); + } } + return arguments; + } - /** + /** * Get list of {@link Argument}. * * @param ctx RareCommandContext instance From 1872a3d44db496b0c54a1ae14efb0001fa429a3e Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Sat, 27 Dec 2025 21:12:06 -0800 Subject: [PATCH 03/24] added argument type missing map and hashmap Signed-off-by: Asif Bashar --- .../main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index 88ea5938783..c27ac3a3504 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -10,6 +10,8 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.Map; +import java.util.HashMap; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; From d7975da95aee58fb0856711692ff0bb5d8ff5a4a Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 13:46:52 -0800 Subject: [PATCH 04/24] added tests Signed-off-by: Asif Bashar --- .../opensearch/sql/ast/tree/Transpose.java | 24 +- .../sql/calcite/CalciteRelNodeVisitor.java | 267 +++++++++--------- docs/user/ppl/cmd/transpose.md | 52 ++-- docs/user/ppl/index.md | 91 +++--- .../sql/calcite/CalciteNoPushdownIT.java | 1 + .../sql/calcite/remote/CalciteExplainIT.java | 12 + .../sql/ppl/NewAddedCommandsIT.java | 11 + .../sql/security/CrossClusterSearchIT.java | 13 + .../opensearch/sql/ppl/parser/AstBuilder.java | 2 +- .../sql/ppl/utils/ArgumentFactory.java | 8 +- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 16 ++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 7 + 12 files changed, 278 insertions(+), 226 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 1a31eef24f4..2e403b6d3ed 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -18,23 +18,23 @@ @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor public class Transpose extends UnresolvedPlan { - private final java.util.Map arguments; + private final java.util.Map arguments; private UnresolvedPlan child; - public Integer getMaxRows(){ - Integer maxRows= 5; - if (arguments.containsKey("number")) { - maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); - } - return maxRows; + public Integer getMaxRows() { + Integer maxRows = 5; + if (arguments.containsKey("number")) { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } + return maxRows; } public String getColumnName() { - String columnName = "column"; - if (arguments.containsKey("columnName")) { - columnName = arguments.get("columnName").getValue().toString(); - } - return columnName; + String columnName = "column"; + if (arguments.containsKey("columnName")) { + columnName = arguments.get("columnName").getValue().toString(); + } + return columnName; } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 73fcdad4760..7f80eb38ce4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -29,10 +29,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Streams; -import java.math.BigDecimal; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; @@ -74,8 +70,6 @@ import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilder.AggCall; -import org.apache.calcite.tools.RelRunner; -import org.apache.calcite.tools.RelRunners; import org.apache.calcite.util.Holder; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.Pair; @@ -178,7 +172,7 @@ public class CalciteRelNodeVisitor extends AbstractNodeVisitor arguments = node.getArguments(); - Integer maxRows= node.getMaxRows(); + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + visitChildren(node, context); + java.util.Map arguments = node.getArguments(); + Integer maxRows = node.getMaxRows(); - String columnName = node.getColumnName(); + String columnName = node.getColumnName(); - // Get the current schema to transpose - RelNode currentNode = context.relBuilder.peek(); - List fieldNames = currentNode.getRowType().getFieldNames(); - List fields = currentNode.getRowType().getFieldList(); + // Get the current schema to transpose + RelNode currentNode = context.relBuilder.peek(); + List fieldNames = currentNode.getRowType().getFieldNames(); + List fields = currentNode.getRowType().getFieldList(); - if (fieldNames.isEmpty()) { - return currentNode; - } + if (fieldNames.isEmpty()) { + return currentNode; + } - // Step 1: Add row numbers to identify each row uniquely - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as("__row_id__"); - context.relBuilder.projectPlus(rowNumber); - - // Step 2: Unpivot the data - convert columns to rows - // Each field becomes a row with: row_id, column, value - List measureColumns = ImmutableList.of("value"); - List axisColumns = ImmutableList.of(columnName); - - // Create the unpivot value mappings - List, List>> valueMappings = new ArrayList<>(); - RelDataType varcharType = - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); - - for (String fieldName : fieldNames) { - if (fieldName.equals("__row_id__")) { - continue; // Skip the row number column - } + // Step 1: Add row numbers to identify each row uniquely + RexNode rowNumber = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_id__"); + context.relBuilder.projectPlus(rowNumber); - // Create the axis value (column name as literal) - RexLiteral columnNameLiteral = context.rexBuilder.makeLiteral(fieldName); - List axisValues = ImmutableList.of(columnNameLiteral); + // Step 2: Unpivot the data - convert columns to rows + // Each field becomes a row with: row_id, column, value + List measureColumns = ImmutableList.of("value"); + List axisColumns = ImmutableList.of(columnName); - // Create the measure value (field expression cast to VARCHAR) - RexNode fieldValue = context.relBuilder.field(fieldName); - RexNode castValue = context.rexBuilder.makeCast(varcharType, fieldValue, true); - List measureValues = ImmutableList.of(castValue); + // Create the unpivot value mappings + List, List>> valueMappings = new ArrayList<>(); + RelDataType varcharType = + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); - // Create the mapping entry - valueMappings.add(new AbstractMap.SimpleEntry<>(axisValues, measureValues)); - } + for (String fieldName : fieldNames) { + if (fieldName.equals("__row_id__")) { + continue; // Skip the row number column + } + + // Create the axis value (column name as literal) + RexLiteral columnNameLiteral = context.rexBuilder.makeLiteral(fieldName); + List axisValues = ImmutableList.of(columnNameLiteral); + + // Create the measure value (field expression cast to VARCHAR) + RexNode fieldValue = context.relBuilder.field(fieldName); + RexNode castValue = context.rexBuilder.makeCast(varcharType, fieldValue, true); + List measureValues = ImmutableList.of(castValue); + + // Create the mapping entry + valueMappings.add(new AbstractMap.SimpleEntry<>(axisValues, measureValues)); + } - // Apply the unpivot operation - context.relBuilder.unpivot( - false, // includeNulls = false - measureColumns, // measure column names: ["value"] - axisColumns, // axis column names: ["column"] - valueMappings // field mappings + // Apply the unpivot operation + context.relBuilder.unpivot( + false, // includeNulls = false + measureColumns, // measure column names: ["value"] + axisColumns, // axis column names: ["column"] + valueMappings // field mappings ); - // Step 3: Pivot the data to transpose rows as columns - // Pivot on __row_id__ with column as the grouping key - // This creates: column, row1, row2, row3, ... - - // Get unique row IDs to create column names - RelNode unpivotedData = context.relBuilder.build(); - - // Create aggregation calls for each row - we'll use MAX since we know each cell has only one - // value - List pivotColumns = new ArrayList<>(); - List pivotValues = new ArrayList<>(); - - // We need to determine how many rows we have to create the proper pivot structure - // For now, we'll use a different approach - use conditional aggregation - - // Get the unpivoted data back on stack - context.relBuilder.push(unpivotedData); - - // Create conditional aggregations for each row position - // We'll use ROW_NUMBER to determine the row positions dynamically - RexNode rowPos = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .partitionBy(context.relBuilder.field(columnName)) - .orderBy(context.relBuilder.field("__row_id__")) - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as("__row_pos__"); - context.relBuilder.projectPlus(rowPos); - - // Now we'll use a different strategy - collect all values per column and then split them - // Group by column and collect all values in order - List groupByKeys = ImmutableList.of(context.relBuilder.field(columnName)); - - // For simplicity, we'll use a direct approach with conditional aggregations - // instead of STRING_AGG which may not be available - - // For simplicity, let's use a manual approach that works with the available Calcite operations - // We'll create a query that manually builds the transposed result - - // Reset and try a simpler approach - context.relBuilder.push(unpivotedData); - - // Let's manually build the pivot by creating conditional aggregations - // First, get distinct row IDs - context.relBuilder.aggregate( - context.relBuilder.groupKey(), - context.relBuilder.max(context.relBuilder.field("__row_id__")).as("max_row_id")); - - - - // Go back to unpivoted data and create the pivot manually - context.relBuilder.clear(); - context.relBuilder.push(unpivotedData); - - // Create aggregation calls for each possible row position - List pivotAggCalls = new ArrayList<>(); - - for (int i = 1; i <= maxRows; i++) { - // Create CASE WHEN __row_id__ = i THEN value END for each row position - RexNode caseExpr = - context.relBuilder.call( - SqlStdOperatorTable.CASE, - context.relBuilder.equals( - context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), - context.relBuilder.field("value"), - context.relBuilder.literal(null)); - - AggCall maxCase = context.relBuilder.max(caseExpr).as("row " + i); - pivotAggCalls.add(maxCase); - } + // Step 3: Pivot the data to transpose rows as columns + // Pivot on __row_id__ with column as the grouping key + // This creates: column, row1, row2, row3, ... - // Group by column and apply the conditional aggregations - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(columnName)), pivotAggCalls); + // Get unique row IDs to create column names + RelNode unpivotedData = context.relBuilder.build(); - return context.relBuilder.peek(); + // Create aggregation calls for each row - we'll use MAX since we know each cell has only one + // value + List pivotColumns = new ArrayList<>(); + List pivotValues = new ArrayList<>(); + + // We need to determine how many rows we have to create the proper pivot structure + // For now, we'll use a different approach - use conditional aggregation + + // Get the unpivoted data back on stack + context.relBuilder.push(unpivotedData); + + // Create conditional aggregations for each row position + // We'll use ROW_NUMBER to determine the row positions dynamically + RexNode rowPos = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .partitionBy(context.relBuilder.field(columnName)) + .orderBy(context.relBuilder.field("__row_id__")) + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_pos__"); + context.relBuilder.projectPlus(rowPos); + + // Now we'll use a different strategy - collect all values per column and then split them + // Group by column and collect all values in order + List groupByKeys = ImmutableList.of(context.relBuilder.field(columnName)); + + // For simplicity, we'll use a direct approach with conditional aggregations + // instead of STRING_AGG which may not be available + + // For simplicity, let's use a manual approach that works with the available Calcite operations + // We'll create a query that manually builds the transposed result + + // Reset and try a simpler approach + context.relBuilder.push(unpivotedData); + + // Let's manually build the pivot by creating conditional aggregations + // First, get distinct row IDs + context.relBuilder.aggregate( + context.relBuilder.groupKey(), + context.relBuilder.max(context.relBuilder.field("__row_id__")).as("max_row_id")); + + // Go back to unpivoted data and create the pivot manually + context.relBuilder.clear(); + context.relBuilder.push(unpivotedData); + + // Create aggregation calls for each possible row position + List pivotAggCalls = new ArrayList<>(); + + for (int i = 1; i <= maxRows; i++) { + // Create CASE WHEN __row_id__ = i THEN value END for each row position + RexNode caseExpr = + context.relBuilder.call( + SqlStdOperatorTable.CASE, + context.relBuilder.equals( + context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), + context.relBuilder.field("value"), + context.relBuilder.literal(null)); + + AggCall maxCase = context.relBuilder.max(caseExpr).as("row " + i); + pivotAggCalls.add(maxCase); } + // Group by column and apply the conditional aggregations + context.relBuilder.aggregate( + context.relBuilder.groupKey(context.relBuilder.field(columnName)), pivotAggCalls); - @Override + return context.relBuilder.peek(); + } + + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md index 98015ef82aa..7d0e6a2a95f 100644 --- a/docs/user/ppl/cmd/transpose.md +++ b/docs/user/ppl/cmd/transpose.md @@ -27,14 +27,14 @@ Expected output: ```text fetched rows / total rows = 4/4 -+----------------+-------+-------+-------+-------+-------+ -| column | row 1 | row 2 | row 3 | row 4 | row 5 | -|----------------|-------+-------+-------+-------+-------+ -| account_number | | | | | | -| firstname | | | | | | -| lastname | | | | | | -| balance | | | | | | -+----------------+-------+-------+-------+-------+-------+ ++----------------+-------+--------+---------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | row 5 | +|----------------+-------+--------+---------+-------+-------| +| account_number | 1 | 6 | 13 | 18 | null | +| firstname | Amber | Hattie | Nanette | Dale | null | +| balance | 39225 | 5686 | 32838 | 4180 | null | +| lastname | Duke | Bond | Bates | Adams | null | ++----------------+-------+--------+---------+-------+-------+ ``` ## Example 2: Tranpose results up to a provided number of rows. @@ -52,17 +52,17 @@ Expected output: ```text fetched rows / total rows = 4/4 -+----------------+-------+-------+-------+-------+ -| column | row 1 | row 2 | row 3 | row 4 | -|----------------|-------+-------+-------+-------+ -| account_number | | | | | -| firstname | | | | | -| lastname | | | | | -| balance | | | | | -+----------------+-------+-------+-------+-------+ ++----------------+-------+--------+---------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ ``` -## Example 2: Tranpose results up to a provided number of rows. +## Example 2: Tranpose results up to a provided number of rows and first column with specified column name. This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. @@ -70,21 +70,21 @@ This example shows transposing wihtout any parameters. It transforms 4 rows into source=accounts | head 5 | fields account_number, firstname, lastname, balance -| transpose 4 +| transpose 4 column_name='column_names' ``` Expected output: ```text fetched rows / total rows = 4/4 -+----------------+-------+-------+-------+-------+ -| column | row 1 | row 2 | row 3 | row 4 | -|----------------|-------+-------+-------+-------+ -| account_number | | | | | -| firstname | | | | | -| lastname | | | | | -| balance | | | | | -+----------------+-------+-------+-------+-------+ ++----------------+-------+--------+---------+-------+ +| column_names | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ ``` ## Limitations diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 30ad7159182..42a9c8237a9 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -35,51 +35,52 @@ source=accounts The following commands are available in PPL: **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. -| Command Name | Version Introduced | Current Status | Command Description | -| --- | --- | --- | --- | -| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | -| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | -| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | -| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | -| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | -| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | -| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | -| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | -| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | -| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | -| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | -| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | -| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | -| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | -| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | -| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | -| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | -| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | -| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | -| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | -| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | -| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | -| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | -| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | -| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | -| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | -| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | -| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | -| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | -| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | -| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | -| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | -| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | -| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | -| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | -| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | -| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | -| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | -| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | -| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | -| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | - | [addtotals command](cmd/addtotals.md) | 3.4 | stable (since 3.4) | Adds row and column values and appends a totals column and row. | - | [addcoltotals command](cmd/addcoltotals.md) | 3.4 | stable (since 3.4) | Adds column values and appends a totals row. | +| Command Name | Version Introduced | Current Status | Command Description | +|----------------------------------------------------|--------------------|--------------------------|-------------------------------------------------------------------------| +| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | +| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | +| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | +| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | +| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | +| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | +| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | +| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | +| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | +| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | +| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | +| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | +| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | +| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | +| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | +| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | +| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | +| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | +| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | +| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | +| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | +| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | +| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | +| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | +| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | +| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | +| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | +| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | +| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | +| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | +| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | +| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | +| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | +| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | +| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | +| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | +| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | +| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | +| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | +| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | +| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns.| - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index c254fb47c44..2babda91636 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -103,6 +103,7 @@ CalciteTextFunctionIT.class, CalciteTopCommandIT.class, CalciteTrendlineCommandIT.class, + CalciteTransposeCommandIT.class, CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d5e60d491bd..4709439cd99 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2021,6 +2021,18 @@ public void testaddColTotalsExplain() throws IOException { + "| addcoltotals balance age label='GrandTotal'")); } + @Test + public void testTransposeExplain() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_transpose.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| transpose 4 column_name='column_names'")); + } + public void testComplexDedup() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 15f3c508b14..9783c8ae169 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -202,6 +202,17 @@ public void testAddColTotalCommand() throws IOException { } } + @Test + public void testTransposeCommand() throws IOException { + JSONObject result; + try { + executeQuery(String.format("search source=%s | transpose ", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 7ee90dc4640..817beea84fe 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -272,6 +272,19 @@ public void testCrossClusterAddColTotals() throws IOException { result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); } + @Test + public void testCrossClusterTranspose() throws IOException { + // Test query_string without fields parameter on remote cluster + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette'|fields" + + " firstname,age,balance | transpose 3 column_name='column_names'", + TEST_INDEX_BANK_REMOTE)); + verifyDataRows( + result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); + } + @Test public void testCrossClusterAppend() throws IOException { // TODO: We should enable calcite by default in CrossClusterSearchIT? diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 6935e76b8ec..0c93998f768 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -740,7 +740,7 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont /** Transpose command. */ @Override public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { - java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); + java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); return new Transpose(arguments); } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index c27ac3a3504..b4936f46816 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -8,10 +8,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; -import java.util.Optional; import java.util.Map; -import java.util.HashMap; +import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; @@ -312,14 +312,14 @@ public static List getArgumentList( public static Map getArgumentList( OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { - Map arguments = new HashMap<>(); + Map arguments = new HashMap<>(); for (OpenSearchPPLParser.TransposeParameterContext ctx : transposeCommandContext.transposeParameter()) { if (ctx.COLUMN_NAME() != null) { Literal columnName = getArgumentValue(ctx.stringLiteral()); arguments.put("columnName", new Argument("columnName", columnName)); - } else if (ctx.number != null) { + } else if (ctx.number != null) { arguments.put("number", new Argument("number", getArgumentValue(ctx.number))); } else { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 7d04ad8e6ad..98cbac2ca3d 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -100,6 +100,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -636,6 +637,21 @@ public String visitTrendline(Trendline node, String context) { return StringUtils.format("%s | trendline %s", child, computations); } + @Override + public String visitTranspose(Transpose node, String context) { + String child = node.getChild().get(0).accept(this, context); + StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); + java.util.Map arguments = node.getArguments(); + + if (arguments.containsKey("number")) { + anonymized.append(StringUtils.format(" %s", arguments.get("number").getValue())); + } + if (arguments.containsKey("columnName")) { + anonymized.append(StringUtils.format(" %s=***", "column_name")); + } + return anonymized.toString(); + } + @Override public String visitAppendCol(AppendCol node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2fd08988f6b..e80b12caba8 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -247,6 +247,13 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testTransposeCommand() { + assertEquals( + "source=table | transpose 5 column_name=***", + anonymize("source=t | transpose 5 column_name='column_names'")); + } + @Test public void testTrendlineCommand() { assertEquals( From 648914b8b0741e63b1c4a5635e89096e2b2c6581 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 16:19:39 -0800 Subject: [PATCH 05/24] added tests Signed-off-by: Asif Bashar --- .../remote/CalciteTransposeCommandIT.java | 199 ++++++++++++++++ .../ppl/calcite/CalcitePPLTransposeTest.java | 216 ++++++++++++++++++ 2 files changed, 415 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java new file mode 100644 index 00000000000..1b4f8cf03d0 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -0,0 +1,199 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.util.MatcherUtils.*; +import static org.opensearch.sql.util.MatcherUtils.rows; + +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteTransposeCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + /** + * default test without parameters on account index + * + * @throws IOException + */ + @Test + public void testTranspose() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 10 | fields firstname , age, balance | transpose 14", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string"), + schema("row 6", "string"), + schema("row 7", "string"), + schema("row 8", "string"), + schema("row 9", "string"), + schema("row 10", "string"), + schema("row 11", "string"), + schema("row 12", "string"), + schema("row 13", "string"), + schema("row 14", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows( + "firstname", + "Amber", + "Hattie", + "Nanette", + "Dale", + "Elinor", + "Virginia", + "Dillard", + "Mcgee", + "Aurelia", + "Fulton", + null, + null, + null, + null), + rows( + "balance ", + "39225", + "5686", + "32838", + "4180", + "16418", + "40540", + "48086", + "18612", + "34487", + "29104", + null, + null, + null, + null), + rows( + "age ", + "32", + "36", + "28", + "33", + "36", + "39", + "34", + "39", + "37", + "23", + null, + null, + null, + null)); + } + + @Test + public void testTransposeLowerLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 15 | fields firstname , age, balance | transpose 5", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeColumnName() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose 5" + + " column_name='column_names'", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column_names", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java new file mode 100644 index 00000000000..b12721d9578 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -0,0 +1,216 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLTransposeTest extends CalcitePPLAbstractTest { + + public CalcitePPLTransposeTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testSimpleCountWithTranspose() { + String ppl = "source=EMP | stats count() as c|transpose"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + + " 4=[MAX($4)], row 5=[MAX($5)])\n" + + " LogicalProject(column=[$2], $f4=[CASE(=($1, 1), CAST($3):VARCHAR NOT NULL," + + " null:NULL)], $f5=[CASE(=($1, 2), CAST($3):VARCHAR NOT NULL, null:NULL)]," + + " $f6=[CASE(=($1, 3), CAST($3):VARCHAR NOT NULL, null:NULL)], $f7=[CASE(=($1, 4)," + + " CAST($3):VARCHAR NOT NULL, null:NULL)], $f8=[CASE(=($1, 5), CAST($3):VARCHAR NOT" + + " NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(c=[$0], __row_id__=[$1], column=[$2], value=[CASE(=($2, 'c')," + + " CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(c=[$0], __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], c=[COUNT()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'c' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "column=c; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t0`.`c`, `t0`.`__row_id__`, `t1`.`column`, CASE WHEN `t1`.`column` =" + + " 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t0`\n" + + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + + "WHERE `t2`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleAggregatesWithAliasesTranspose() { + String ppl = + "source=EMP | stats avg(SAL) as avg_sal, max(SAL) as max_sal, min(SAL) as min_sal, count()" + + " as cnt|transpose "; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + + " 4=[MAX($4)], row 5=[MAX($5)])\n" + + " LogicalProject(column=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)], $f10=[CASE(=($4, 4)," + + " CAST($6):VARCHAR NOT NULL, null:NULL)], $f11=[CASE(=($4, 5), CAST($6):VARCHAR NOT" + + " NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " __row_id__=[$4], column=[$5], value=[CASE(=($5, 'avg_sal'), NUMBER_TO_STRING($0)," + + " =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal'), NUMBER_TO_STRING($2)," + + " =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + + " min_sal=[MIN($0)], cnt=[COUNT()])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + + " 'cnt' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=max_sal; row 1=5000.00; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=cnt ; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=min_sal; row 1=800.00; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`__row_id__`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimit() { + String ppl = "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" + + " LogicalProject(column=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + + " column=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5, 'COMM')," + + " NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5, 'SAL')," + + " NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" + + "column=JOB ; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column=SAL ; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," + + " `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN CAST(`t`.`ENAME` AS STRING)" + + " WHEN `t0`.`column` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` =" + + " 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column` = 'SAL' THEN" + + " NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimitColumnName() { + String ppl = + "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3 column_name='column_names'"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" + + " LogicalProject(column_names=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + + " column_names=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5," + + " 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5," + + " 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column_names=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column_names=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" + + "column_names=JOB ; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column_names=SAL ; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column_names`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," + + " `t0`.`column_names`, CASE WHEN `t0`.`column_names` = 'ENAME' THEN CAST(`t`.`ENAME`" + + " AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN" + + " `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column_names`" + + " = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column_names`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY `column_names`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} From 9bde0d0cf35b3428845529ef17f18b7b5cfd643f Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 17:17:38 -0800 Subject: [PATCH 06/24] added tests Signed-off-by: Asif Bashar --- .../org/opensearch/sql/security/CrossClusterSearchIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 817beea84fe..9af85d64131 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -278,9 +278,11 @@ public void testCrossClusterTranspose() throws IOException { JSONObject result = executeQuery( String.format( - "search source=%s | where firstname='Hattie' or firstname ='Nanette'|fields" - + " firstname,age,balance | transpose 3 column_name='column_names'", + "search source=%s | where firstname='Hattie' or firstname ='Nanette' or" + + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + + " transpose 3 column_name='column_names'", TEST_INDEX_BANK_REMOTE)); + System.out.println(result.toString()); verifyDataRows( result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); } From 3080c78216ce4c987d78588e464063c85403cf2f Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 17:28:08 -0800 Subject: [PATCH 07/24] added tests Signed-off-by: Asif Bashar --- .../org/opensearch/sql/security/CrossClusterSearchIT.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 9af85d64131..6bd34297b9b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -282,9 +282,11 @@ public void testCrossClusterTranspose() throws IOException { + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + " transpose 3 column_name='column_names'", TEST_INDEX_BANK_REMOTE)); - System.out.println(result.toString()); verifyDataRows( - result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); + result, + rows("firstname", "Nanette", "Hattie", "Dale"), + rows("balance ", "32838", "5686", "4180"), + rows("age ", "28", "36", "33")); } @Test From 45be8ec0390cfdcd15868f9529e75378c08267b6 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 18:11:01 -0800 Subject: [PATCH 08/24] added tests Signed-off-by: Asif Bashar --- .../sql/calcite/CalciteRelNodeVisitor.java | 37 ++++++------------- .../ppl/calcite/CalcitePPLTransposeTest.java | 29 +++++++-------- 2 files changed, 25 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 9b9e7792c8b..2a5b4926cc8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -701,7 +701,7 @@ public RelNode visitReverse( public RelNode visitTranspose( org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { visitChildren(node, context); - java.util.Map arguments = node.getArguments(); + Integer maxRows = node.getMaxRows(); String columnName = node.getColumnName(); @@ -715,7 +715,7 @@ public RelNode visitTranspose( return currentNode; } - // Step 1: Add row numbers to identify each row uniquely + // Add row numbers to identify each row uniquely RexNode rowNumber = context .relBuilder @@ -725,7 +725,7 @@ public RelNode visitTranspose( .as("__row_id__"); context.relBuilder.projectPlus(rowNumber); - // Step 2: Unpivot the data - convert columns to rows + // Unpivot the data - convert columns to rows // Each field becomes a row with: row_id, column, value List measureColumns = ImmutableList.of("value"); List axisColumns = ImmutableList.of(columnName); @@ -761,23 +761,14 @@ public RelNode visitTranspose( valueMappings // field mappings ); - // Step 3: Pivot the data to transpose rows as columns + // Pivot the data to transpose rows as columns // Pivot on __row_id__ with column as the grouping key // This creates: column, row1, row2, row3, ... // Get unique row IDs to create column names - RelNode unpivotedData = context.relBuilder.build(); - - // Create aggregation calls for each row - we'll use MAX since we know each cell has only one - // value - List pivotColumns = new ArrayList<>(); - List pivotValues = new ArrayList<>(); - - // We need to determine how many rows we have to create the proper pivot structure - // For now, we'll use a different approach - use conditional aggregation - + // RelNode unpivotedData = context.relBuilder.build(); // Get the unpivoted data back on stack - context.relBuilder.push(unpivotedData); + // context.relBuilder.push(unpivotedData); // Create conditional aggregations for each row position // We'll use ROW_NUMBER to determine the row positions dynamically @@ -792,29 +783,23 @@ public RelNode visitTranspose( .as("__row_pos__"); context.relBuilder.projectPlus(rowPos); + /* // Now we'll use a different strategy - collect all values per column and then split them // Group by column and collect all values in order List groupByKeys = ImmutableList.of(context.relBuilder.field(columnName)); - - // For simplicity, we'll use a direct approach with conditional aggregations - // instead of STRING_AGG which may not be available - - // For simplicity, let's use a manual approach that works with the available Calcite operations // We'll create a query that manually builds the transposed result - - // Reset and try a simpler approach context.relBuilder.push(unpivotedData); - - // Let's manually build the pivot by creating conditional aggregations + // build the pivot by creating conditional aggregations // First, get distinct row IDs - context.relBuilder.aggregate( + without distict row id , check if it works + context.relBuilder.aggregate( context.relBuilder.groupKey(), context.relBuilder.max(context.relBuilder.field("__row_id__")).as("max_row_id")); // Go back to unpivoted data and create the pivot manually context.relBuilder.clear(); context.relBuilder.push(unpivotedData); - + */ // Create aggregation calls for each possible row position List pivotAggCalls = new ArrayList<>(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index b12721d9578..3dd0ac3ceee 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -22,10 +22,10 @@ public void testSimpleCountWithTranspose() { String expectedLogical = "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + " 4=[MAX($4)], row 5=[MAX($5)])\n" - + " LogicalProject(column=[$2], $f4=[CASE(=($1, 1), CAST($3):VARCHAR NOT NULL," - + " null:NULL)], $f5=[CASE(=($1, 2), CAST($3):VARCHAR NOT NULL, null:NULL)]," - + " $f6=[CASE(=($1, 3), CAST($3):VARCHAR NOT NULL, null:NULL)], $f7=[CASE(=($1, 4)," - + " CAST($3):VARCHAR NOT NULL, null:NULL)], $f8=[CASE(=($1, 5), CAST($3):VARCHAR NOT" + + " LogicalProject(column=[$2], $f5=[CASE(=($1, 1), CAST($3):VARCHAR NOT NULL," + + " null:NULL)], $f6=[CASE(=($1, 2), CAST($3):VARCHAR NOT NULL, null:NULL)]," + + " $f7=[CASE(=($1, 3), CAST($3):VARCHAR NOT NULL, null:NULL)], $f8=[CASE(=($1, 4)," + + " CAST($3):VARCHAR NOT NULL, null:NULL)], $f9=[CASE(=($1, 5), CAST($3):VARCHAR NOT" + " NULL, null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + " LogicalProject(c=[$0], __row_id__=[$1], column=[$2], value=[CASE(=($2, 'c')," @@ -65,10 +65,10 @@ public void testMultipleAggregatesWithAliasesTranspose() { String expectedLogical = "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + " 4=[MAX($4)], row 5=[MAX($5)])\n" - + " LogicalProject(column=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)], $f10=[CASE(=($4, 4)," - + " CAST($6):VARCHAR NOT NULL, null:NULL)], $f11=[CASE(=($4, 5), CAST($6):VARCHAR NOT" + + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)], $f11=[CASE(=($4, 4)," + + " CAST($6):VARCHAR NOT NULL, null:NULL)], $f12=[CASE(=($4, 5), CAST($6):VARCHAR NOT" + " NULL, null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," @@ -123,9 +123,9 @@ public void testTransposeWithLimit() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" - + " LogicalProject(column=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + " column=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5, 'COMM')," @@ -136,7 +136,6 @@ public void testTransposeWithLimit() { + " __row_id__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; - verifyLogical(root, expectedLogical); String expectedResult = "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + "column=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" @@ -172,9 +171,9 @@ public void testTransposeWithLimitColumnName() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" - + " LogicalProject(column_names=[$5], $f7=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f8=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f9=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalProject(column_names=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + " column_names=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5," From b0db7ba4291d01af024f335b6079dd7fd9037248 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 18:14:47 -0800 Subject: [PATCH 09/24] added tests Signed-off-by: Asif Bashar --- .../sql/calcite/CalciteRelNodeVisitor.java | 30 ++----------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 2a5b4926cc8..5f122c239c2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -701,16 +701,12 @@ public RelNode visitReverse( public RelNode visitTranspose( org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { visitChildren(node, context); - Integer maxRows = node.getMaxRows(); - String columnName = node.getColumnName(); - // Get the current schema to transpose RelNode currentNode = context.relBuilder.peek(); List fieldNames = currentNode.getRowType().getFieldNames(); List fields = currentNode.getRowType().getFieldList(); - if (fieldNames.isEmpty()) { return currentNode; } @@ -725,7 +721,7 @@ public RelNode visitTranspose( .as("__row_id__"); context.relBuilder.projectPlus(rowNumber); - // Unpivot the data - convert columns to rows + // Unpivot the data - convert columns to rows // Each field becomes a row with: row_id, column, value List measureColumns = ImmutableList.of("value"); List axisColumns = ImmutableList.of(columnName); @@ -761,15 +757,10 @@ public RelNode visitTranspose( valueMappings // field mappings ); - // Pivot the data to transpose rows as columns + // Pivot the data to transpose rows as columns // Pivot on __row_id__ with column as the grouping key // This creates: column, row1, row2, row3, ... - // Get unique row IDs to create column names - // RelNode unpivotedData = context.relBuilder.build(); - // Get the unpivoted data back on stack - // context.relBuilder.push(unpivotedData); - // Create conditional aggregations for each row position // We'll use ROW_NUMBER to determine the row positions dynamically RexNode rowPos = @@ -783,23 +774,6 @@ public RelNode visitTranspose( .as("__row_pos__"); context.relBuilder.projectPlus(rowPos); - /* - // Now we'll use a different strategy - collect all values per column and then split them - // Group by column and collect all values in order - List groupByKeys = ImmutableList.of(context.relBuilder.field(columnName)); - // We'll create a query that manually builds the transposed result - context.relBuilder.push(unpivotedData); - // build the pivot by creating conditional aggregations - // First, get distinct row IDs - without distict row id , check if it works - context.relBuilder.aggregate( - context.relBuilder.groupKey(), - context.relBuilder.max(context.relBuilder.field("__row_id__")).as("max_row_id")); - - // Go back to unpivoted data and create the pivot manually - context.relBuilder.clear(); - context.relBuilder.push(unpivotedData); - */ // Create aggregation calls for each possible row position List pivotAggCalls = new ArrayList<>(); From 4db8d2aaf117558d4e70622ff07f967110ba637b Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 18:36:53 -0800 Subject: [PATCH 10/24] added tests Signed-off-by: Asif Bashar --- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 - 1 file changed, 1 deletion(-) diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 57f935846b7..03b0f71e148 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1670,6 +1670,5 @@ searchableKeyWord | COL | TRANSPOSE | COLUMN_NAME - | NUMBER ; From 914ad4e3c06abb8910f679354bbe359783aa2aae Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 29 Dec 2025 19:55:56 -0800 Subject: [PATCH 11/24] added tests Signed-off-by: Asif Bashar --- .../calcite/explain_transpose.yaml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml new file mode 100644 index 00000000000..98a833a4bd5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row 4=[MAX($4)]) + LogicalProject(column_names=[$18], $f21=[CASE(=($17, 1), CAST($19):VARCHAR NOT NULL, null:NULL)], $f22=[CASE(=($17, 2), CAST($19):VARCHAR NOT NULL, null:NULL)], $f23=[CASE(=($17, 3), CAST($19):VARCHAR NOT NULL, null:NULL)], $f24=[CASE(=($17, 4), CAST($19):VARCHAR NOT NULL, null:NULL)]) + LogicalFilter(condition=[IS NOT NULL($19)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, =($18, '_id'), CAST($11):VARCHAR NOT NULL, =($18, '_index'), CAST($12):VARCHAR NOT NULL, =($18, '_score'), NUMBER_TO_STRING($13), =($18, '_maxscore'), NUMBER_TO_STRING($14), =($18, '_sort'), CAST($15):VARCHAR NOT NULL, =($18, '_routing'), CAST($16):VARCHAR NOT NULL, null:NULL)]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[ROW_NUMBER() OVER ()]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{0}], row 1=[MAX($1) FILTER $2], row 2=[MAX($3) FILTER $4], row 3=[MAX($5) FILTER $6], row 4=[MAX($7) FILTER $8]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['account_number'], expr#20=[=($t18, $t19)], expr#21=[CAST($t0):VARCHAR NOT NULL], expr#22=['firstname'], expr#23=[=($t18, $t22)], expr#24=[CAST($t1):VARCHAR NOT NULL], expr#25=['address'], expr#26=[=($t18, $t25)], expr#27=[CAST($t2):VARCHAR NOT NULL], expr#28=['balance'], expr#29=[=($t18, $t28)], expr#30=[CAST($t3):VARCHAR NOT NULL], expr#31=['gender'], expr#32=[=($t18, $t31)], expr#33=[CAST($t4):VARCHAR NOT NULL], expr#34=['city'], expr#35=[=($t18, $t34)], expr#36=[CAST($t5):VARCHAR NOT NULL], expr#37=['employer'], expr#38=[=($t18, $t37)], expr#39=[CAST($t6):VARCHAR NOT NULL], expr#40=['state'], expr#41=[=($t18, $t40)], expr#42=[CAST($t7):VARCHAR NOT NULL], expr#43=['age'], expr#44=[=($t18, $t43)], expr#45=[CAST($t8):VARCHAR NOT NULL], expr#46=['email'], expr#47=[=($t18, $t46)], expr#48=[CAST($t9):VARCHAR NOT NULL], expr#49=['lastname'], expr#50=[=($t18, $t49)], expr#51=[CAST($t10):VARCHAR NOT NULL], expr#52=['_id'], expr#53=[=($t18, $t52)], expr#54=[CAST($t11):VARCHAR NOT NULL], expr#55=['_index'], expr#56=[=($t18, $t55)], expr#57=[CAST($t12):VARCHAR NOT NULL], expr#58=['_score'], expr#59=[=($t18, $t58)], expr#60=[NUMBER_TO_STRING($t13)], expr#61=['_maxscore'], expr#62=[=($t18, $t61)], expr#63=[NUMBER_TO_STRING($t14)], expr#64=['_sort'], expr#65=[=($t18, $t64)], expr#66=[CAST($t15):VARCHAR NOT NULL], expr#67=['_routing'], expr#68=[=($t18, $t67)], expr#69=[CAST($t16):VARCHAR NOT NULL], expr#70=[null:NULL], expr#71=[CASE($t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t47, $t48, $t50, $t51, $t53, $t54, $t56, $t57, $t59, $t60, $t62, $t63, $t65, $t66, $t68, $t69, $t70)], expr#72=[CAST($t71):VARCHAR NOT NULL], expr#73=[1], expr#74=[=($t17, $t73)], expr#75=[2], expr#76=[=($t17, $t75)], expr#77=[3], expr#78=[=($t17, $t77)], expr#79=[4], expr#80=[=($t17, $t79)], column_names=[$t18], value=[$t72], $f6=[$t74], value0=[$t72], $f8=[$t76], value1=[$t72], $f10=[$t78], value2=[$t72], $f12=[$t80]) + EnumerableNestedLoopJoin(condition=[CASE(SEARCH($18, Sarg['_id':CHAR(14), '_index':CHAR(14), 'account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)), true, =($18, '_score'), IS NOT NULL(NUMBER_TO_STRING($13)), =($18, '_maxscore'), IS NOT NULL(NUMBER_TO_STRING($14)), SEARCH($18, Sarg['_routing', '_sort':CHAR(8)]:CHAR(8)), true, false)], joinType=[inner]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m"}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) From 1cf9083c85c5a0770ea136efa44d7415da7e875f Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 2 Jan 2026 10:38:11 -0800 Subject: [PATCH 12/24] added more validations Signed-off-by: Asif Bashar --- .../java/org/opensearch/sql/ast/tree/Transpose.java | 10 ++++++++-- .../opensearch/sql/calcite/CalciteRelNodeVisitor.java | 5 ++++- .../org/opensearch/sql/ppl/utils/ArgumentFactory.java | 6 ++++-- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 8 +++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 2e403b6d3ed..64d321506ac 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -18,13 +18,19 @@ @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor public class Transpose extends UnresolvedPlan { - private final java.util.Map arguments; + private final @NonNull java.util.Map arguments; private UnresolvedPlan child; public Integer getMaxRows() { Integer maxRows = 5; if (arguments.containsKey("number")) { - maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + try { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } catch (NumberFormatException e) { + // log warning and use default + maxRows = 5; + } + } return maxRows; } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 5f122c239c2..8183fce6f29 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -702,6 +702,9 @@ public RelNode visitTranspose( org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { visitChildren(node, context); Integer maxRows = node.getMaxRows(); + if (maxRows == null || maxRows <= 0) { + throw new IllegalArgumentException("maxRows must be a positive integer"); + } String columnName = node.getColumnName(); // Get the current schema to transpose RelNode currentNode = context.relBuilder.peek(); @@ -783,7 +786,7 @@ public RelNode visitTranspose( context.relBuilder.call( SqlStdOperatorTable.CASE, context.relBuilder.equals( - context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), + context.relBuilder.field("__row_pos__"), context.relBuilder.literal(i)), context.relBuilder.field("value"), context.relBuilder.literal(null)); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index b4936f46816..b820b1a1fa7 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -317,6 +317,9 @@ public static Map getArgumentList( transposeCommandContext.transposeParameter()) { if (ctx.COLUMN_NAME() != null) { + if (ctx.stringLiteral() == null) { + throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); + } Literal columnName = getArgumentValue(ctx.stringLiteral()); arguments.put("columnName", new Argument("columnName", columnName)); } else if (ctx.number != null) { @@ -325,8 +328,7 @@ public static Map getArgumentList( } else { throw new IllegalArgumentException( String.format( - "A parameter of transpose must be a int limit, column_name, header_field, or" - + " include_empty, got %s", + "A parameter of transpose must be a int limit, column_name , got %s", ctx)); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 98cbac2ca3d..fb889290996 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -639,12 +639,18 @@ public String visitTrendline(Trendline node, String context) { @Override public String visitTranspose(Transpose node, String context) { + if (node.getChild().isEmpty()) { + return "source=*** | transpose"; + } String child = node.getChild().get(0).accept(this, context); StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); java.util.Map arguments = node.getArguments(); if (arguments.containsKey("number")) { - anonymized.append(StringUtils.format(" %s", arguments.get("number").getValue())); + Argument numberArg = arguments.get("number"); + if (numberArg != null) { + anonymized.append(StringUtils.format(" %s", numberArg.getValue())); + } } if (arguments.containsKey("columnName")) { anonymized.append(StringUtils.format(" %s=***", "column_name")); From 913ec3090e553b53eb9ee5c43195bb230a9203d5 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 2 Jan 2026 11:55:45 -0800 Subject: [PATCH 13/24] added validation Signed-off-by: Asif Bashar --- .../org/opensearch/sql/ast/tree/Transpose.java | 15 +++++++-------- .../sql/calcite/CalciteRelNodeVisitor.java | 4 ++-- .../opensearch/sql/ppl/utils/ArgumentFactory.java | 5 ++--- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 10 +++++----- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 64d321506ac..344787a37c1 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -18,19 +18,18 @@ @EqualsAndHashCode(callSuper = false) @RequiredArgsConstructor public class Transpose extends UnresolvedPlan { - private final @NonNull java.util.Map arguments; + private final @NonNull java.util.Map arguments; private UnresolvedPlan child; public Integer getMaxRows() { Integer maxRows = 5; if (arguments.containsKey("number")) { - try { - maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); - } catch (NumberFormatException e) { - // log warning and use default - maxRows = 5; - } - + try { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } catch (NumberFormatException e) { + // log warning and use default + maxRows = 5; + } } return maxRows; } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 8183fce6f29..736dad91114 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -703,7 +703,7 @@ public RelNode visitTranspose( visitChildren(node, context); Integer maxRows = node.getMaxRows(); if (maxRows == null || maxRows <= 0) { - throw new IllegalArgumentException("maxRows must be a positive integer"); + throw new IllegalArgumentException("maxRows must be a positive integer"); } String columnName = node.getColumnName(); // Get the current schema to transpose @@ -786,7 +786,7 @@ public RelNode visitTranspose( context.relBuilder.call( SqlStdOperatorTable.CASE, context.relBuilder.equals( - context.relBuilder.field("__row_pos__"), context.relBuilder.literal(i)), + context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), context.relBuilder.field("value"), context.relBuilder.literal(null)); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index b820b1a1fa7..72090e2f069 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -318,7 +318,7 @@ public static Map getArgumentList( if (ctx.COLUMN_NAME() != null) { if (ctx.stringLiteral() == null) { - throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); + throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); } Literal columnName = getArgumentValue(ctx.stringLiteral()); arguments.put("columnName", new Argument("columnName", columnName)); @@ -328,8 +328,7 @@ public static Map getArgumentList( } else { throw new IllegalArgumentException( String.format( - "A parameter of transpose must be a int limit, column_name , got %s", - ctx)); + "A parameter of transpose must be a int limit, column_name , got %s", ctx)); } } return arguments; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index fb889290996..41656061611 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -640,17 +640,17 @@ public String visitTrendline(Trendline node, String context) { @Override public String visitTranspose(Transpose node, String context) { if (node.getChild().isEmpty()) { - return "source=*** | transpose"; + return "source=*** | transpose"; } String child = node.getChild().get(0).accept(this, context); StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); java.util.Map arguments = node.getArguments(); if (arguments.containsKey("number")) { - Argument numberArg = arguments.get("number"); - if (numberArg != null) { - anonymized.append(StringUtils.format(" %s", numberArg.getValue())); - } + Argument numberArg = arguments.get("number"); + if (numberArg != null) { + anonymized.append(StringUtils.format(" %s", numberArg.getValue())); + } } if (arguments.containsKey("columnName")) { anonymized.append(StringUtils.format(" %s=***", "column_name")); From 0ee835784f331b77a152f8d4204910811214e378 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 2 Jan 2026 15:12:42 -0800 Subject: [PATCH 14/24] index.md formatting fix Signed-off-by: Asif Bashar --- docs/user/ppl/index.md | 96 +++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 42a9c8237a9..05b8ea125cc 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -11,7 +11,7 @@ The query start with search command and then flowing a set of command delimited for example, the following query retrieve firstname and lastname from accounts if age large than 18. -```ppl ignore +``` source=accounts | where age > 18 | fields firstname, lastname @@ -35,53 +35,53 @@ source=accounts The following commands are available in PPL: **Note:** Experimental commands are ready for use, but specific parameters may change based on feedback. -| Command Name | Version Introduced | Current Status | Command Description | -|----------------------------------------------------|--------------------|--------------------------|-------------------------------------------------------------------------| -| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | -| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | -| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | -| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | -| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | -| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | -| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | -| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | -| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | -| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | -| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | -| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | -| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | -| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | -| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | -| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | -| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | -| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | -| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | -| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | -| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | -| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | -| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | -| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | -| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | -| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | -| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | -| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | -| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | -| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | -| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | -| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | -| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | -| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | -| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | -| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | -| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | -| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | -| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | -| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | -| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | -| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | -| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | -| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns.| - +| Command Name | Version Introduced | Current Status | Command Description | +| --- | --- | --- | --- | +| [search command](cmd/search.md) | 1.0 | stable (since 1.0) | Retrieve documents from the index. | +| [where command](cmd/where.md) | 1.0 | stable (since 1.0) | Filter the search result using boolean expressions. | +| [subquery command](cmd/subquery.md) | 3.0 | experimental (since 3.0) | Embed one PPL query inside another for complex filtering and data retrieval operations. | +| [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | +| [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | +| [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | +| [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | +| [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | +| [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | +| [flatten command](cmd/flatten.md) | 3.1 | experimental (since 3.1) | Flatten a struct or an object field into separate fields in a document. | +| [table command](cmd/table.md) | 3.3 | experimental (since 3.3) | Keep or remove fields from the search result using enhanced syntax options. | +| [stats command](cmd/stats.md) | 1.0 | stable (since 1.0) | Calculate aggregation from search results. | +| [eventstats command](cmd/eventstats.md) | 3.1 | experimental (since 3.1) | Calculate aggregation statistics and add them as new fields to each event. | +| [streamstats command](cmd/streamstats.md) | 3.4 | experimental (since 3.4) | Calculate cumulative or rolling statistics as events are processed in order. | +| [bin command](cmd/bin.md) | 3.3 | experimental (since 3.3) | Group numeric values into buckets of equal intervals. | +| [timechart command](cmd/timechart.md) | 3.3 | experimental (since 3.3) | Create time-based charts and visualizations. | +| [chart command](cmd/chart.md) | 3.4 | experimental (since 3.4) | Apply statistical aggregations to search results and group the data for visualizations. | +| [trendline command](cmd/trendline.md) | 3.0 | experimental (since 3.0) | Calculate moving averages of fields. | +| [sort command](cmd/sort.md) | 1.0 | stable (since 1.0) | Sort all the search results by the specified fields. | +| [reverse command](cmd/reverse.md) | 3.2 | experimental (since 3.2) | Reverse the display order of search results. | +| [head command](cmd/head.md) | 1.0 | stable (since 1.0) | Return the first N number of specified results after an optional offset in search order. | +| [dedup command](cmd/dedup.md) | 1.0 | stable (since 1.0) | Remove identical documents defined by the field from the search result. | +| [top command](cmd/top.md) | 1.0 | stable (since 1.0) | Find the most common tuple of values of all fields in the field list. | +| [rare command](cmd/rare.md) | 1.0 | stable (since 1.0) | Find the least common tuple of values of all fields in the field list. | +| [parse command](cmd/parse.md) | 1.3 | stable (since 1.3) | Parse a text field with a regular expression and append the result to the search result. | +| [grok command](cmd/grok.md) | 2.4 | stable (since 2.4) | Parse a text field with a grok pattern and append the results to the search result. | +| [rex command](cmd/rex.md) | 3.3 | experimental (since 3.3) | Extract fields from a raw text field using regular expression named capture groups. | +| [regex command](cmd/regex.md) | 3.3 | experimental (since 3.3) | Filter search results by matching field values against a regular expression pattern. | +| [spath command](cmd/spath.md) | 3.3 | experimental (since 3.3) | Extract fields from structured text data. | +| [patterns command](cmd/patterns.md) | 2.4 | stable (since 2.4) | Extract log patterns from a text field and append the results to the search result. | +| [join command](cmd/join.md) | 3.0 | stable (since 3.0) | Combine two datasets together. | +| [append command](cmd/append.md) | 3.3 | experimental (since 3.3) | Append the result of a sub-search to the bottom of the input search results. | +| [appendcol command](cmd/appendcol.md) | 3.1 | experimental (since 3.1) | Append the result of a sub-search and attach it alongside the input search results. | +| [lookup command](cmd/lookup.md) | 3.0 | experimental (since 3.0) | Add or replace data from a lookup index. | +| [multisearch command](cmd/multisearch.md) | 3.4 | experimental (since 3.4) | Execute multiple search queries and combine their results. | +| [ml command](cmd/ml.md) | 2.5 | stable (since 2.5) | Apply machine learning algorithms to analyze data. | +| [kmeans command](cmd/kmeans.md) | 1.3 | stable (since 1.3) | Apply the kmeans algorithm on the search result returned by a PPL command. | +| [ad command](cmd/ad.md) | 1.3 | deprecated (since 2.5) | Apply Random Cut Forest algorithm on the search result returned by a PPL command. | +| [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | +| [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | +| [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) From 616f536fd6e591c74a68e6f695ae396245c605c8 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 2 Jan 2026 16:05:02 -0800 Subject: [PATCH 15/24] doc format Signed-off-by: Asif Bashar --- .../opensearch/sql/ast/tree/Transpose.java | 6 ++++ docs/user/ppl/cmd/transpose.md | 34 +++++++++---------- docs/user/ppl/index.md | 2 +- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 344787a37c1..b4a9036eb26 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -10,6 +10,7 @@ import lombok.*; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.common.utils.StringUtils; /** AST node represent Transpose operation. */ @Getter @@ -20,6 +21,7 @@ public class Transpose extends UnresolvedPlan { private final @NonNull java.util.Map arguments; private UnresolvedPlan child; + private static final int max_limit = 1000; public Integer getMaxRows() { Integer maxRows = 5; @@ -31,6 +33,10 @@ public Integer getMaxRows() { maxRows = 5; } } + if (maxRows > max_limit) { + throw new IllegalArgumentException( + StringUtils.format("Maximum limit to transpose is %s", max_limit)); + } return maxRows; } diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md index 7d0e6a2a95f..e177254ed12 100644 --- a/docs/user/ppl/cmd/transpose.md +++ b/docs/user/ppl/cmd/transpose.md @@ -1,30 +1,30 @@ -# transpose +# transpose -## Description +## Description The `transpose` command outputs the requested number of rows as columns, effectively transposing each result row into a corresponding column of field values. -## Syntax +## Syntax transpose [int] [column_name=] -* number-of-rows: optional. The number of rows to transform into columns. -* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. - - -## Example 1: Transpose results +* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 1000. +* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. + + +## Example 1: Transpose results This example shows transposing wihtout any parameters. It transforms 5 rows into columns as default is 5. - + ```ppl source=accounts | head 5 | fields account_number, firstname, lastname, balance | transpose ``` - + Expected output: - + ```text fetched rows / total rows = 4/4 +----------------+-------+--------+---------+-------+-------+ @@ -36,8 +36,8 @@ fetched rows / total rows = 4/4 | lastname | Duke | Bond | Bates | Adams | null | +----------------+-------+--------+---------+-------+-------+ ``` - -## Example 2: Tranpose results up to a provided number of rows. + +## Example 2: Tranpose results up to a provided number of rows. This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. @@ -47,9 +47,9 @@ source=accounts | fields account_number, firstname, lastname, balance | transpose 4 ``` - + Expected output: - + ```text fetched rows / total rows = 4/4 +----------------+-------+--------+---------+-------+ @@ -86,7 +86,7 @@ fetched rows / total rows = 4/4 | lastname | Duke | Bond | Bates | Adams | +----------------+-------+--------+---------+-------+ ``` - -## Limitations + +## Limitations The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 05b8ea125cc..6728ff5be77 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -11,7 +11,7 @@ The query start with search command and then flowing a set of command delimited for example, the following query retrieve firstname and lastname from accounts if age large than 18. -``` +```ppl ignore source=accounts | where age > 18 | fields firstname, lastname From 46d6b0b50d63537824391f488894834ac8819e2b Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Fri, 2 Jan 2026 16:55:28 -0800 Subject: [PATCH 16/24] coderabbit review fixes Signed-off-by: Asif Bashar --- core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index b4a9036eb26..221eb73e9de 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -25,7 +25,7 @@ public class Transpose extends UnresolvedPlan { public Integer getMaxRows() { Integer maxRows = 5; - if (arguments.containsKey("number")) { + if (arguments.containsKey("number") && arguments.get("number").getValue() != null) { try { maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); } catch (NumberFormatException e) { @@ -42,7 +42,7 @@ public Integer getMaxRows() { public String getColumnName() { String columnName = "column"; - if (arguments.containsKey("columnName")) { + if (arguments.containsKey("columnName") && arguments.get("columnName").getValue() != null) { columnName = arguments.get("columnName").getValue().toString(); } return columnName; From 53919e7a634acc8a80d02e7daa52f92917908743 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 12 Jan 2026 23:47:41 -0800 Subject: [PATCH 17/24] added recommended changes Signed-off-by: Asif Bashar --- .../opensearch/sql/ast/tree/Transpose.java | 6 +- .../sql/calcite/CalciteRelNodeVisitor.java | 156 ++++++------------ .../sql/calcite/utils/PlanUtils.java | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 2 +- 4 files changed, 59 insertions(+), 106 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java index 221eb73e9de..2705853088b 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -21,7 +21,7 @@ public class Transpose extends UnresolvedPlan { private final @NonNull java.util.Map arguments; private UnresolvedPlan child; - private static final int max_limit = 1000; + private static final int MAX_LIMIT_TRANSPOSE = 10000; public Integer getMaxRows() { Integer maxRows = 5; @@ -33,9 +33,9 @@ public Integer getMaxRows() { maxRows = 5; } } - if (maxRows > max_limit) { + if (maxRows > MAX_LIMIT_TRANSPOSE) { throw new IllegalArgumentException( - StringUtils.format("Maximum limit to transpose is %s", max_limit)); + StringUtils.format("Maximum limit to transpose is %s", MAX_LIMIT_TRANSPOSE)); } return maxRows; } diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 736dad91114..ae3a4cf4fe1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -55,6 +55,7 @@ import org.apache.calcite.rel.type.RelDataTypeFamily; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; @@ -697,110 +698,61 @@ public RelNode visitReverse( return context.relBuilder.peek(); } - @Override - public RelNode visitTranspose( - org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { - visitChildren(node, context); - Integer maxRows = node.getMaxRows(); - if (maxRows == null || maxRows <= 0) { - throw new IllegalArgumentException("maxRows must be a positive integer"); - } - String columnName = node.getColumnName(); - // Get the current schema to transpose - RelNode currentNode = context.relBuilder.peek(); - List fieldNames = currentNode.getRowType().getFieldNames(); - List fields = currentNode.getRowType().getFieldList(); - if (fieldNames.isEmpty()) { - return currentNode; - } - - // Add row numbers to identify each row uniquely - RexNode rowNumber = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as("__row_id__"); - context.relBuilder.projectPlus(rowNumber); - - // Unpivot the data - convert columns to rows - // Each field becomes a row with: row_id, column, value - List measureColumns = ImmutableList.of("value"); - List axisColumns = ImmutableList.of(columnName); - - // Create the unpivot value mappings - List, List>> valueMappings = new ArrayList<>(); - RelDataType varcharType = - context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); - - for (String fieldName : fieldNames) { - if (fieldName.equals("__row_id__")) { - continue; // Skip the row number column - } - - // Create the axis value (column name as literal) - RexLiteral columnNameLiteral = context.rexBuilder.makeLiteral(fieldName); - List axisValues = ImmutableList.of(columnNameLiteral); - - // Create the measure value (field expression cast to VARCHAR) - RexNode fieldValue = context.relBuilder.field(fieldName); - RexNode castValue = context.rexBuilder.makeCast(varcharType, fieldValue, true); - List measureValues = ImmutableList.of(castValue); - - // Create the mapping entry - valueMappings.add(new AbstractMap.SimpleEntry<>(axisValues, measureValues)); - } - - // Apply the unpivot operation - context.relBuilder.unpivot( - false, // includeNulls = false - measureColumns, // measure column names: ["value"] - axisColumns, // axis column names: ["column"] - valueMappings // field mappings - ); - - // Pivot the data to transpose rows as columns - // Pivot on __row_id__ with column as the grouping key - // This creates: column, row1, row2, row3, ... - - // Create conditional aggregations for each row position - // We'll use ROW_NUMBER to determine the row positions dynamically - RexNode rowPos = - context - .relBuilder - .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .partitionBy(context.relBuilder.field(columnName)) - .orderBy(context.relBuilder.field("__row_id__")) - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as("__row_pos__"); - context.relBuilder.projectPlus(rowPos); - - // Create aggregation calls for each possible row position - List pivotAggCalls = new ArrayList<>(); - - for (int i = 1; i <= maxRows; i++) { - // Create CASE WHEN __row_id__ = i THEN value END for each row position - RexNode caseExpr = - context.relBuilder.call( - SqlStdOperatorTable.CASE, - context.relBuilder.equals( - context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), - context.relBuilder.field("value"), - context.relBuilder.literal(null)); + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + + visitChildren(node, context); + + int maxRows = Optional.ofNullable(node.getMaxRows()) + .filter(r -> r > 0) + .orElseThrow(() -> new IllegalArgumentException("maxRows must be positive")); + + String columnName = node.getColumnName(); + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + + + RelBuilder b = context.relBuilder; + RexBuilder rx = context.rexBuilder; + RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + // Step 1: ROW_NUMBER + b.projectPlus( + b.aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)); + + // Step 2: UNPIVOT + b.unpivot( + false, + ImmutableList.of("value"), + ImmutableList.of(columnName), + fieldNames.stream() + .map(f -> Map.entry( + ImmutableList.of(rx.makeLiteral(f)), + ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) + .collect(Collectors.toList())); + + // Step 3: PIVOT + b.pivot( + b.groupKey(b.field(columnName)), + ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), + IntStream.rangeClosed(1, maxRows) + .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) + .collect(Collectors.toList())); + + // Step 4: RENAME + List cleanNames = new ArrayList<>(); + cleanNames.add(columnName); + for (int i = 1; i <= maxRows; i++) { + cleanNames.add("row " + i); + } + b.rename(cleanNames); - AggCall maxCase = context.relBuilder.max(caseExpr).as("row " + i); - pivotAggCalls.add(maxCase); + return b.peek(); } - - // Group by column and apply the conditional aggregations - context.relBuilder.aggregate( - context.relBuilder.groupKey(context.relBuilder.field(columnName)), pivotAggCalls); - - return context.relBuilder.peek(); - } - @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index d89c36601ef..11c0989900e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -84,6 +84,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; + String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 03b0f71e148..4edb0344e57 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -132,6 +132,7 @@ commandName | REX | APPENDPIPE | REPLACE + | TRANSPOSE ; searchCommand @@ -1668,7 +1669,6 @@ searchableKeyWord | FIELDNAME | ROW | COL - | TRANSPOSE | COLUMN_NAME ; From 7ed1bc1d063849470570e107914bc59256fe71e4 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Mon, 12 Jan 2026 23:54:18 -0800 Subject: [PATCH 18/24] added recommended changes Signed-off-by: Asif Bashar --- docs/user/ppl/cmd/transpose.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md index e177254ed12..442d4b9716d 100644 --- a/docs/user/ppl/cmd/transpose.md +++ b/docs/user/ppl/cmd/transpose.md @@ -8,7 +8,7 @@ The `transpose` command outputs the requested number of rows as columns, effecti transpose [int] [column_name=] -* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 1000. +* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 10000. * column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. @@ -89,4 +89,4 @@ fetched rows / total rows = 4/4 ## Limitations -The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file +The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file From 253e95e50b224fba14d216aba2d59a1f1f0d6ccb Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 00:03:22 -0800 Subject: [PATCH 19/24] for cross cluster failure debugging Signed-off-by: Asif Bashar --- .../java/org/opensearch/sql/security/CrossClusterSearchIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 6bd34297b9b..6607f08245c 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -282,6 +282,7 @@ public void testCrossClusterTranspose() throws IOException { + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + " transpose 3 column_name='column_names'", TEST_INDEX_BANK_REMOTE)); + System.out.println(result.toString()); verifyDataRows( result, rows("firstname", "Nanette", "Hattie", "Dale"), From ae653475755bf50422951f4cb34cfb43de9245b0 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 00:09:30 -0800 Subject: [PATCH 20/24] for cross cluster failure debugging Signed-off-by: Asif Bashar --- .../org/opensearch/sql/security/CrossClusterSearchIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 6607f08245c..0688c4b421b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -282,12 +282,12 @@ public void testCrossClusterTranspose() throws IOException { + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + " transpose 3 column_name='column_names'", TEST_INDEX_BANK_REMOTE)); - System.out.println(result.toString()); + verifyDataRows( result, rows("firstname", "Nanette", "Hattie", "Dale"), - rows("balance ", "32838", "5686", "4180"), - rows("age ", "28", "36", "33")); + rows("balance", "32838", "5686", "4180"), + rows("age", "28", "36", "33")); } @Test From a5c007fc45bdf4bff11a1fde374e582647686c76 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 00:27:42 -0800 Subject: [PATCH 21/24] for cross cluster failure debugging Signed-off-by: Asif Bashar --- .../sql/calcite/CalciteRelNodeVisitor.java | 112 +++++++++--------- .../sql/security/CrossClusterSearchIT.java | 2 +- 2 files changed, 58 insertions(+), 56 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index ae3a4cf4fe1..679e08b98a7 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -29,7 +29,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Streams; -import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; @@ -54,8 +53,8 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; @@ -698,61 +697,64 @@ public RelNode visitReverse( return context.relBuilder.peek(); } - @Override - public RelNode visitTranspose( - org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { - - visitChildren(node, context); - - int maxRows = Optional.ofNullable(node.getMaxRows()) - .filter(r -> r > 0) - .orElseThrow(() -> new IllegalArgumentException("maxRows must be positive")); - - String columnName = node.getColumnName(); - List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); - - - RelBuilder b = context.relBuilder; - RexBuilder rx = context.rexBuilder; - RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); - - // Step 1: ROW_NUMBER - b.projectPlus( - b.aggregateCall(SqlStdOperatorTable.ROW_NUMBER) - .over() - .rowsTo(RexWindowBounds.CURRENT_ROW) - .as(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)); - - // Step 2: UNPIVOT - b.unpivot( - false, - ImmutableList.of("value"), - ImmutableList.of(columnName), - fieldNames.stream() - .map(f -> Map.entry( - ImmutableList.of(rx.makeLiteral(f)), - ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) - .collect(Collectors.toList())); - - // Step 3: PIVOT - b.pivot( - b.groupKey(b.field(columnName)), - ImmutableList.of(b.max(b.field("value"))), - ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), - IntStream.rangeClosed(1, maxRows) - .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) - .collect(Collectors.toList())); - - // Step 4: RENAME - List cleanNames = new ArrayList<>(); - cleanNames.add(columnName); - for (int i = 1; i <= maxRows; i++) { - cleanNames.add("row " + i); - } - b.rename(cleanNames); + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + + visitChildren(node, context); + + int maxRows = + Optional.ofNullable(node.getMaxRows()) + .filter(r -> r > 0) + .orElseThrow(() -> new IllegalArgumentException("maxRows must be positive")); + + String columnName = node.getColumnName(); + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); - return b.peek(); + RelBuilder b = context.relBuilder; + RexBuilder rx = context.rexBuilder; + RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + // Step 1: ROW_NUMBER + b.projectPlus( + b.aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)); + + // Step 2: UNPIVOT + b.unpivot( + false, + ImmutableList.of("value"), + ImmutableList.of(columnName), + fieldNames.stream() + .map( + f -> + Map.entry( + ImmutableList.of(rx.makeLiteral(f)), + ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) + .collect(Collectors.toList())); + + // Step 3: PIVOT + b.pivot( + b.groupKey(b.field(columnName)), + ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), + IntStream.rangeClosed(1, maxRows) + .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) + .collect(Collectors.toList())); + + // Step 4: RENAME + List cleanNames = new ArrayList<>(); + cleanNames.add(columnName); + for (int i = 1; i <= maxRows; i++) { + cleanNames.add("row " + i); } + b.rename(cleanNames); + + return b.peek(); + } + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 0688c4b421b..9e1ffb0bf07 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -282,7 +282,7 @@ public void testCrossClusterTranspose() throws IOException { + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + " transpose 3 column_name='column_names'", TEST_INDEX_BANK_REMOTE)); - + verifyDataRows( result, rows("firstname", "Nanette", "Hattie", "Dale"), From 484ec6733c4defaca741a62e385ee4b4ab0f80f4 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 13:35:52 -0800 Subject: [PATCH 22/24] testing with no trim Signed-off-by: Asif Bashar --- .../calcite/explain_transpose.yaml | 23 +- .../ppl/calcite/CalcitePPLTransposeTest.java | 214 ++++++++++-------- 2 files changed, 126 insertions(+), 111 deletions(-) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml index 98a833a4bd5..a30b4b44aa5 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -1,19 +1,20 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row 4=[MAX($4)]) - LogicalProject(column_names=[$18], $f21=[CASE(=($17, 1), CAST($19):VARCHAR NOT NULL, null:NULL)], $f22=[CASE(=($17, 2), CAST($19):VARCHAR NOT NULL, null:NULL)], $f23=[CASE(=($17, 3), CAST($19):VARCHAR NOT NULL, null:NULL)], $f24=[CASE(=($17, 4), CAST($19):VARCHAR NOT NULL, null:NULL)]) - LogicalFilter(condition=[IS NOT NULL($19)]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, =($18, '_id'), CAST($11):VARCHAR NOT NULL, =($18, '_index'), CAST($12):VARCHAR NOT NULL, =($18, '_score'), NUMBER_TO_STRING($13), =($18, '_maxscore'), NUMBER_TO_STRING($14), =($18, '_sort'), CAST($15):VARCHAR NOT NULL, =($18, '_routing'), CAST($16):VARCHAR NOT NULL, null:NULL)]) - LogicalJoin(condition=[true], joinType=[inner]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[ROW_NUMBER() OVER ()]) - LogicalSort(fetch=[5]) - CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) - LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) + LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) + LogicalAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1) FILTER $3], row 3_null=[MAX($1) FILTER $4], row 4_null=[MAX($1) FILTER $5]) + LogicalProject(column_names=[$18], value=[CAST($19):VARCHAR NOT NULL], $f20=[=($17, 1)], $f21=[=($17, 2)], $f22=[=($17, 3)], $f23=[=($17, 4)]) + LogicalFilter(condition=[IS NOT NULL($19)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, =($18, '_id'), CAST($11):VARCHAR NOT NULL, =($18, '_index'), CAST($12):VARCHAR NOT NULL, =($18, '_score'), NUMBER_TO_STRING($13), =($18, '_maxscore'), NUMBER_TO_STRING($14), =($18, '_sort'), CAST($15):VARCHAR NOT NULL, =($18, '_routing'), CAST($16):VARCHAR NOT NULL, null:NULL)]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableAggregate(group=[{0}], row 1=[MAX($1) FILTER $2], row 2=[MAX($3) FILTER $4], row 3=[MAX($5) FILTER $6], row 4=[MAX($7) FILTER $8]) - EnumerableCalc(expr#0..18=[{inputs}], expr#19=['account_number'], expr#20=[=($t18, $t19)], expr#21=[CAST($t0):VARCHAR NOT NULL], expr#22=['firstname'], expr#23=[=($t18, $t22)], expr#24=[CAST($t1):VARCHAR NOT NULL], expr#25=['address'], expr#26=[=($t18, $t25)], expr#27=[CAST($t2):VARCHAR NOT NULL], expr#28=['balance'], expr#29=[=($t18, $t28)], expr#30=[CAST($t3):VARCHAR NOT NULL], expr#31=['gender'], expr#32=[=($t18, $t31)], expr#33=[CAST($t4):VARCHAR NOT NULL], expr#34=['city'], expr#35=[=($t18, $t34)], expr#36=[CAST($t5):VARCHAR NOT NULL], expr#37=['employer'], expr#38=[=($t18, $t37)], expr#39=[CAST($t6):VARCHAR NOT NULL], expr#40=['state'], expr#41=[=($t18, $t40)], expr#42=[CAST($t7):VARCHAR NOT NULL], expr#43=['age'], expr#44=[=($t18, $t43)], expr#45=[CAST($t8):VARCHAR NOT NULL], expr#46=['email'], expr#47=[=($t18, $t46)], expr#48=[CAST($t9):VARCHAR NOT NULL], expr#49=['lastname'], expr#50=[=($t18, $t49)], expr#51=[CAST($t10):VARCHAR NOT NULL], expr#52=['_id'], expr#53=[=($t18, $t52)], expr#54=[CAST($t11):VARCHAR NOT NULL], expr#55=['_index'], expr#56=[=($t18, $t55)], expr#57=[CAST($t12):VARCHAR NOT NULL], expr#58=['_score'], expr#59=[=($t18, $t58)], expr#60=[NUMBER_TO_STRING($t13)], expr#61=['_maxscore'], expr#62=[=($t18, $t61)], expr#63=[NUMBER_TO_STRING($t14)], expr#64=['_sort'], expr#65=[=($t18, $t64)], expr#66=[CAST($t15):VARCHAR NOT NULL], expr#67=['_routing'], expr#68=[=($t18, $t67)], expr#69=[CAST($t16):VARCHAR NOT NULL], expr#70=[null:NULL], expr#71=[CASE($t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t47, $t48, $t50, $t51, $t53, $t54, $t56, $t57, $t59, $t60, $t62, $t63, $t65, $t66, $t68, $t69, $t70)], expr#72=[CAST($t71):VARCHAR NOT NULL], expr#73=[1], expr#74=[=($t17, $t73)], expr#75=[2], expr#76=[=($t17, $t75)], expr#77=[3], expr#78=[=($t17, $t77)], expr#79=[4], expr#80=[=($t17, $t79)], column_names=[$t18], value=[$t72], $f6=[$t74], value0=[$t72], $f8=[$t76], value1=[$t72], $f10=[$t78], value2=[$t72], $f12=[$t80]) + EnumerableAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1) FILTER $3], row 3_null=[MAX($1) FILTER $4], row 4_null=[MAX($1) FILTER $5]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['account_number'], expr#20=[=($t18, $t19)], expr#21=[CAST($t0):VARCHAR NOT NULL], expr#22=['firstname'], expr#23=[=($t18, $t22)], expr#24=[CAST($t1):VARCHAR NOT NULL], expr#25=['address'], expr#26=[=($t18, $t25)], expr#27=[CAST($t2):VARCHAR NOT NULL], expr#28=['balance'], expr#29=[=($t18, $t28)], expr#30=[CAST($t3):VARCHAR NOT NULL], expr#31=['gender'], expr#32=[=($t18, $t31)], expr#33=[CAST($t4):VARCHAR NOT NULL], expr#34=['city'], expr#35=[=($t18, $t34)], expr#36=[CAST($t5):VARCHAR NOT NULL], expr#37=['employer'], expr#38=[=($t18, $t37)], expr#39=[CAST($t6):VARCHAR NOT NULL], expr#40=['state'], expr#41=[=($t18, $t40)], expr#42=[CAST($t7):VARCHAR NOT NULL], expr#43=['age'], expr#44=[=($t18, $t43)], expr#45=[CAST($t8):VARCHAR NOT NULL], expr#46=['email'], expr#47=[=($t18, $t46)], expr#48=[CAST($t9):VARCHAR NOT NULL], expr#49=['lastname'], expr#50=[=($t18, $t49)], expr#51=[CAST($t10):VARCHAR NOT NULL], expr#52=['_id'], expr#53=[=($t18, $t52)], expr#54=[CAST($t11):VARCHAR NOT NULL], expr#55=['_index'], expr#56=[=($t18, $t55)], expr#57=[CAST($t12):VARCHAR NOT NULL], expr#58=['_score'], expr#59=[=($t18, $t58)], expr#60=[NUMBER_TO_STRING($t13)], expr#61=['_maxscore'], expr#62=[=($t18, $t61)], expr#63=[NUMBER_TO_STRING($t14)], expr#64=['_sort'], expr#65=[=($t18, $t64)], expr#66=[CAST($t15):VARCHAR NOT NULL], expr#67=['_routing'], expr#68=[=($t18, $t67)], expr#69=[CAST($t16):VARCHAR NOT NULL], expr#70=[null:NULL], expr#71=[CASE($t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t47, $t48, $t50, $t51, $t53, $t54, $t56, $t57, $t59, $t60, $t62, $t63, $t65, $t66, $t68, $t69, $t70)], expr#72=[CAST($t71):VARCHAR NOT NULL], expr#73=[1], expr#74=[=($t17, $t73)], expr#75=[2], expr#76=[=($t17, $t75)], expr#77=[3], expr#78=[=($t17, $t77)], expr#79=[4], expr#80=[=($t17, $t79)], column_names=[$t18], value=[$t72], $f20=[$t74], $f21=[$t76], $f22=[$t78], $f23=[$t80]) EnumerableNestedLoopJoin(condition=[CASE(SEARCH($18, Sarg['_id':CHAR(14), '_index':CHAR(14), 'account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)), true, =($18, '_score'), IS NOT NULL(NUMBER_TO_STRING($13)), =($18, '_maxscore'), IS NOT NULL(NUMBER_TO_STRING($14)), SEARCH($18, Sarg['_routing', '_sort':CHAR(8)]:CHAR(8)), true, false)], joinType=[inner]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m"}, requestedTotalSize=5, pageSize=null, startFrom=0)]) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java index 3dd0ac3ceee..79f10b90240 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -20,39 +20,40 @@ public void testSimpleCountWithTranspose() { String ppl = "source=EMP | stats count() as c|transpose"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" - + " 4=[MAX($4)], row 5=[MAX($5)])\n" - + " LogicalProject(column=[$2], $f5=[CASE(=($1, 1), CAST($3):VARCHAR NOT NULL," - + " null:NULL)], $f6=[CASE(=($1, 2), CAST($3):VARCHAR NOT NULL, null:NULL)]," - + " $f7=[CASE(=($1, 3), CAST($3):VARCHAR NOT NULL, null:NULL)], $f8=[CASE(=($1, 4)," - + " CAST($3):VARCHAR NOT NULL, null:NULL)], $f9=[CASE(=($1, 5), CAST($3):VARCHAR NOT" - + " NULL, null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($3)])\n" - + " LogicalProject(c=[$0], __row_id__=[$1], column=[$2], value=[CASE(=($2, 'c')," - + " CAST($0):VARCHAR NOT NULL, null:NULL)])\n" - + " LogicalJoin(condition=[true], joinType=[inner])\n" - + " LogicalProject(c=[$0], __row_id__=[ROW_NUMBER() OVER ()])\n" - + " LogicalAggregate(group=[{}], c=[COUNT()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'c' }]])\n"; + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1)" + + " FILTER $3], row 3_null=[MAX($1) FILTER $4], row 4_null=[MAX($1) FILTER $5], row" + + " 5_null=[MAX($1) FILTER $6])\n" + + " LogicalProject(column=[$2], value=[CAST($3):VARCHAR NOT NULL], $f4=[=($1, 1)]," + + " $f5=[=($1, 2)], $f6=[=($1, 3)], $f7=[=($1, 4)], $f8=[=($1, 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," + + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], c=[COUNT()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'c' }]])\n"; + verifyLogical(root, expectedLogical); String expectedResult = "column=c; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 5`\n" - + "FROM (SELECT `t0`.`c`, `t0`.`__row_id__`, `t1`.`column`, CASE WHEN `t1`.`column` =" - + " 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" - + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `__row_id__`\n" + "SELECT `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 1)" + + " `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2)" + + " `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 3)" + + " `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 4)" + + " `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 5)" + + " `row 5`\n" + + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" + + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t0`\n" + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + "WHERE `t2`.`value` IS NOT NULL\n" + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -63,26 +64,25 @@ public void testMultipleAggregatesWithAliasesTranspose() { + " as cnt|transpose "; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" - + " 4=[MAX($4)], row 5=[MAX($5)])\n" - + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)], $f11=[CASE(=($4, 4)," - + " CAST($6):VARCHAR NOT NULL, null:NULL)], $f12=[CASE(=($4, 5), CAST($6):VARCHAR NOT" - + " NULL, null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($6)])\n" - + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," - + " __row_id__=[$4], column=[$5], value=[CASE(=($5, 'avg_sal'), NUMBER_TO_STRING($0)," - + " =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal'), NUMBER_TO_STRING($2)," - + " =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" - + " LogicalJoin(condition=[true], joinType=[inner])\n" - + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," - + " __row_id__=[ROW_NUMBER() OVER ()])\n" - + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1)" + + " FILTER $3], row 3_null=[MAX($1) FILTER $4], row 4_null=[MAX($1) FILTER $5], row" + + " 5_null=[MAX($1) FILTER $6])\n" + + " LogicalProject(column=[$5], value=[CAST($6):VARCHAR NOT NULL], $f7=[=($4, 1)]," + + " $f8=[=($4, 2)], $f9=[=($4, 3)], $f10=[=($4, 4)], $f11=[=($4, 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," + + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + " min_sal=[MIN($0)], cnt=[COUNT()])\n" - + " LogicalProject(SAL=[$5])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + " 'cnt' }]])\n"; verifyLogical(root, expectedLogical); String expectedResult = @@ -93,20 +93,20 @@ public void testMultipleAggregatesWithAliasesTranspose() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 5`\n" + "SELECT `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 1)" + + " `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2)" + + " `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 3)" + + " `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 4)" + + " `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 5)" + + " `row 5`\n" + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," - + " `t1`.`__row_id__`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal' THEN" - + " NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + " STRING) ELSE NULL END `value`\n" + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," - + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `__row_id__`\n" + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t1`\n" + "CROSS JOIN (VALUES ('avg_sal'),\n" + "('max_sal'),\n" @@ -114,6 +114,7 @@ public void testMultipleAggregatesWithAliasesTranspose() { + "('cnt')) `t2` (`column`)) `t3`\n" + "WHERE `t3`.`value` IS NOT NULL\n" + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -122,20 +123,24 @@ public void testTransposeWithLimit() { String ppl = "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" - + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($6)])\n" - + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," - + " column=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5, 'COMM')," - + " NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5, 'SAL')," - + " NUMBER_TO_STRING($3), null:NULL)])\n" - + " LogicalJoin(condition=[true], joinType=[inner])\n" - + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," - + " __row_id__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1)" + + " FILTER $3], row 3_null=[MAX($1) FILTER $4])\n" + + " LogicalProject(column=[$5], value=[CAST($6):VARCHAR NOT NULL], $f7=[=($4, 1)]," + + " $f8=[=($4, 2)], $f9=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + + verifyLogical(root, expectedLogical); String expectedResult = "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + "column=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" @@ -144,16 +149,18 @@ public void testTransposeWithLimit() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" - + " END) `row 3`\n" - + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," - + " `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN CAST(`t`.`ENAME` AS STRING)" - + " WHEN `t0`.`column` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` =" - + " 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column` = 'SAL' THEN" - + " NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" - + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + "SELECT `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 1)" + + " `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 2)" + + " `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` = 3)" + + " `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" + + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + + " `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" + "CROSS JOIN (VALUES ('ENAME'),\n" + "('COMM'),\n" @@ -161,6 +168,7 @@ public void testTransposeWithLimit() { + "('SAL')) `t0` (`column`)) `t1`\n" + "WHERE `t1`.`value` IS NOT NULL\n" + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -170,20 +178,23 @@ public void testTransposeWithLimitColumnName() { "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3 column_name='column_names'"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" - + " LogicalProject(column_names=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," - + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," - + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" - + " LogicalFilter(condition=[IS NOT NULL($6)])\n" - + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," - + " column_names=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5," - + " 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5," - + " 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" - + " LogicalJoin(condition=[true], joinType=[inner])\n" - + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," - + " __row_id__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + "LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{0}], row 1_null=[MAX($1) FILTER $2], row 2_null=[MAX($1)" + + " FILTER $3], row 3_null=[MAX($1) FILTER $4])\n" + + " LogicalProject(column_names=[$5], value=[CAST($6):VARCHAR NOT NULL], $f7=[=($4," + + " 1)], $f8=[=($4, 2)], $f9=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + verifyLogical(root, expectedLogical); String expectedResult = "column_names=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" @@ -193,16 +204,18 @@ public void testTransposeWithLimitColumnName() { verifyResult(root, expectedResult); String expectedSparkSql = - "SELECT `column_names`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE" - + " NULL END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE" - + " NULL END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE" - + " NULL END) `row 3`\n" - + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," - + " `t0`.`column_names`, CASE WHEN `t0`.`column_names` = 'ENAME' THEN CAST(`t`.`ENAME`" - + " AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN" - + " `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column_names`" - + " = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" - + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + "SELECT `column_names`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_`" + + " = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` =" + + " 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE `_row_number_transpose_` =" + + " 3) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" + + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" + + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" + + " NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + "FROM `scott`.`EMP`) `t`\n" + "CROSS JOIN (VALUES ('ENAME'),\n" + "('COMM'),\n" @@ -210,6 +223,7 @@ public void testTransposeWithLimitColumnName() { + "('SAL')) `t0` (`column_names`)) `t1`\n" + "WHERE `t1`.`value` IS NOT NULL\n" + "GROUP BY `column_names`"; + verifyPPLToSparkSQL(root, expectedSparkSql); } } From 5f951aa6965a0733760e5e30e646d230ba892413 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 13:47:42 -0800 Subject: [PATCH 23/24] cross cluster test not trimming Signed-off-by: Asif Bashar --- .../org/opensearch/sql/security/CrossClusterSearchIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 9e1ffb0bf07..461ec0d2e99 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -286,8 +286,8 @@ public void testCrossClusterTranspose() throws IOException { verifyDataRows( result, rows("firstname", "Nanette", "Hattie", "Dale"), - rows("balance", "32838", "5686", "4180"), - rows("age", "28", "36", "33")); + rows("balance ", "32838", "5686", "4180"), + rows("age ", "28", "36", "33")); } @Test From 6fce14b0cfe7aff5a27e55fe98167be22e89bea5 Mon Sep 17 00:00:00 2001 From: Asif Bashar Date: Tue, 13 Jan 2026 20:48:21 -0800 Subject: [PATCH 24/24] cross cluster test not trimming Signed-off-by: Asif Bashar --- .../org/opensearch/sql/security/CrossClusterSearchIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 461ec0d2e99..9e1ffb0bf07 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -286,8 +286,8 @@ public void testCrossClusterTranspose() throws IOException { verifyDataRows( result, rows("firstname", "Nanette", "Hattie", "Dale"), - rows("balance ", "32838", "5686", "4180"), - rows("age ", "28", "36", "33")); + rows("balance", "32838", "5686", "4180"), + rows("age", "28", "36", "33")); } @Test