diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 29042a2b36f..83191e59830 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -84,6 +84,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.Sort; @@ -775,6 +776,11 @@ public LogicalPlan visitCloseCursor(CloseCursor closeCursor, AnalysisContext con return new LogicalCloseCursor(closeCursor.getChild().get(0).accept(this, context)); } + @Override + public LogicalPlan visitReplace(Replace node, AnalysisContext context) { + throw getOnlyForCalciteException("Replace"); + } + @Override public LogicalPlan visitJoin(Join node, AnalysisContext context) { throw getOnlyForCalciteException("Join"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a2d54d3ec05..f5aebb1d70d 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -72,6 +72,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.RelationSubquery; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -239,6 +240,10 @@ public T visitRename(Rename node, C context) { return visitChildren(node, context); } + public T visitReplace(Replace node, C context) { + return visitChildren(node, context); + } + public T visitEval(Eval node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java new file mode 100644 index 00000000000..51b3533fbd8 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Replace.java @@ -0,0 +1,91 @@ +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +public class Replace extends UnresolvedPlan { + private final UnresolvedExpression pattern; + private final UnresolvedExpression replacement; + private final List fieldList; + private UnresolvedPlan child; + + public Replace( + UnresolvedExpression pattern, UnresolvedExpression replacement, List fieldList) { + this.pattern = pattern; + this.replacement = replacement; + this.fieldList = fieldList; + validate(); + } + + private void validate() { + if (pattern == null) { + throw new IllegalArgumentException("Pattern expression cannot be null in Replace command"); + } + if (replacement == null) { + throw new IllegalArgumentException( + "Replacement expression cannot be null in Replace command"); + } + + // Validate pattern is a string literal + if (!(pattern instanceof Literal && ((Literal) pattern).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Pattern must be a string literal in Replace command"); + } + + // Validate replacement is a string literal + if (!(replacement instanceof Literal && ((Literal) replacement).getType() == DataType.STRING)) { + throw new IllegalArgumentException("Replacement must be a string literal in Replace command"); + } + + if (fieldList == null || fieldList.isEmpty()) { + throw new IllegalArgumentException( + "Field list cannot be empty in Replace command. Use IN clause to specify the field."); + } + Set uniqueFields = new HashSet<>(); + List duplicates = + fieldList.stream() + .map(field -> field.getField().toString()) + .filter(fieldName -> !uniqueFields.add(fieldName)) + .collect(Collectors.toList()); + + if (!duplicates.isEmpty()) { + throw new IllegalArgumentException( + String.format("Duplicate fields [%s] in Replace command", String.join(", ", duplicates))); + } + } + + @Override + public Replace attach(UnresolvedPlan child) { + if (null == this.child) { + this.child = child; + } else { + this.child.attach(child); + } + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitReplace(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index c1d497c46f2..5464490856e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -50,6 +50,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.rex.RexWindowBounds; +import org.apache.calcite.sql.fun.SqlLibraryOperators; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; @@ -109,6 +110,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; import org.opensearch.sql.ast.tree.Sort; @@ -135,11 +137,13 @@ import org.opensearch.sql.expression.parse.RegexCommonUtils; import org.opensearch.sql.utils.ParseUtils; import org.opensearch.sql.utils.WildcardRenameUtils; +import org.opensearch.sql.utils.WildcardReplaceUtils; public class CalciteRelNodeVisitor extends AbstractNodeVisitor { private final CalciteRexNodeVisitor rexVisitor; private final CalciteAggCallVisitor aggVisitor; + private static final String NEW_FIELD_PREFIX = "new_"; public CalciteRelNodeVisitor() { this.rexVisitor = new CalciteRexNodeVisitor(this); @@ -2136,6 +2140,62 @@ public RelNode visitValues(Values values, CalcitePlanContext context) { } } + @Override + public RelNode visitReplace(Replace node, CalcitePlanContext context) { + visitChildren(node, context); + String pattern = ((Literal) node.getPattern()).getValue().toString(); + String replacement = ((Literal) node.getReplacement()).getValue().toString(); + + // Remove quotes if present + pattern = pattern.replaceAll("^[\"']|[\"']$", ""); + replacement = replacement.replaceAll("^[\"']|[\"']$", ""); + + // Validate patterns only if wildcards are present + if (WildcardRenameUtils.isWildcardPattern(pattern) + || WildcardRenameUtils.isWildcardPattern(replacement)) { + WildcardReplaceUtils.validatePatterns(pattern, replacement); + } + + List projectList = new ArrayList<>(); + List newFieldNames = new ArrayList<>(); + // Add original fields + for (String fieldName : context.relBuilder.peek().getRowType().getFieldNames()) { + projectList.add(context.relBuilder.field(fieldName)); + newFieldNames.add(fieldName); + } + // Process fields for replacement + for (Field field : node.getFieldList()) { + String fieldName = field.getField().toString(); + RexNode fieldRef = context.relBuilder.field(fieldName); + if (WildcardRenameUtils.isWildcardPattern(pattern) + || WildcardRenameUtils.isWildcardPattern(replacement)) { + String regexPattern = WildcardReplaceUtils.convertToRegexPattern(pattern); + String regexReplacement = WildcardReplaceUtils.convertToRegexReplacement(replacement); + // Use REGEXP_REPLACE for wildcard patterns + RexNode replaceCall = + context.relBuilder.call( + SqlLibraryOperators.REGEXP_REPLACE_3, + fieldRef, + context.relBuilder.literal(regexPattern), + context.relBuilder.literal(regexReplacement)); + projectList.add(replaceCall); + } else { + System.out.println("Using REPLACE"); + // Use standard REPLACE for non-wildcard patterns + RexNode replaceCall = + context.relBuilder.call( + SqlStdOperatorTable.REPLACE, + fieldRef, + context.relBuilder.literal(pattern), + context.relBuilder.literal(replacement)); + projectList.add(replaceCall); + } + newFieldNames.add(NEW_FIELD_PREFIX + fieldName); + } + context.relBuilder.project(projectList, newFieldNames); + return context.relBuilder.peek(); + } + private void buildParseRelNode(Parse node, CalcitePlanContext context) { RexNode sourceField = rexVisitor.analyze(node.getSourceField(), context); ParseMethod parseMethod = node.getParseMethod(); diff --git a/core/src/main/java/org/opensearch/sql/utils/WildcardReplaceUtils.java b/core/src/main/java/org/opensearch/sql/utils/WildcardReplaceUtils.java new file mode 100644 index 00000000000..73a4491af72 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/utils/WildcardReplaceUtils.java @@ -0,0 +1,111 @@ +package org.opensearch.sql.utils; + +import java.util.regex.Pattern; + +/** Utility class for handling wildcard patterns in replace operations. */ +public class WildcardReplaceUtils { + + /** + * Convert wildcard pattern to regex pattern for REGEXP_REPLACE. + * + * @param pattern Pattern that may contain wildcards + * @return Regex pattern + */ + public static String convertToRegexPattern(String pattern) { + if (pattern == null || pattern.isEmpty()) { + return pattern; + } + + // If not a wildcard pattern, return as is + if (!WildcardRenameUtils.isWildcardPattern(pattern)) { + return pattern; + } + + // Check for consecutive wildcards before any substring operations + if (pattern.matches(".*\\*{2,}.*")) { + throw new IllegalArgumentException("Consecutive wildcards are not supported"); + } + + // Handle single wildcard pattern + if (pattern.equals("*")) { + return "(.*)"; + } + + // Handle different wildcard positions + if (pattern.startsWith("*") && pattern.endsWith("*")) { + // *abc* -> Pattern matches 'abc' anywhere + String middle = pattern.substring(1, pattern.length() - 1); + return "(.*)" + Pattern.quote(middle) + "(.*)"; + } else if (pattern.startsWith("*")) { + // *abc -> Pattern matches 'abc' at end + String end = pattern.substring(1); + return "(.*)" + Pattern.quote(end) + "$"; + } else if (pattern.endsWith("*")) { + // abc* -> Pattern matches 'abc' at start with explicit capture group + String start = pattern.substring(0, pattern.length() - 1); + return "^" + Pattern.quote(start) + "(.*)"; // Explicitly create capture group + } + return pattern; + } + + /** + * Convert wildcard replacement to regex replacement. Converts * to corresponding regex group + * references ($1, $2, etc.) + * + * @param replacement Replacement pattern with wildcards + * @return Regex replacement string + */ + public static String convertToRegexReplacement(String replacement) { + if (!WildcardRenameUtils.isWildcardPattern(replacement)) { + return replacement; + } + if (replacement.startsWith("*") && replacement.endsWith("*")) { + // *XYZ* -> Replacement with both prefix and suffix captured content + String middle = replacement.substring(1, replacement.length() - 1); + return "$1" + middle + "$2"; + } else if (replacement.startsWith("*")) { + // *XYZ -> Replacement with prefix captured content + String end = replacement.substring(1); + return "$1" + end; + } else if (replacement.endsWith("*")) { + // XYZ* -> Replacement with suffix captured content + String start = replacement.substring(0, replacement.length() - 1); + return start + "$1"; + } + return replacement; + } + + /** + * Validate wildcard patterns compatibility. + * + * @param pattern Source pattern + * @param replacement Replacement pattern + * @throws IllegalArgumentException if patterns are invalid + */ + public static void validatePatterns(String pattern, String replacement) { + if (WildcardRenameUtils.isWildcardPattern(pattern) + || WildcardRenameUtils.isWildcardPattern(replacement)) { + if (pattern.matches(".*\\*{2,}.*") || replacement.matches(".*\\*{2,}.*")) { + throw new IllegalArgumentException("Consecutive wildcards are not supported"); + } + } + + // If replacement has wildcard, pattern must have wildcard + if (WildcardRenameUtils.isWildcardPattern(replacement) + && !WildcardRenameUtils.isWildcardPattern(pattern)) { + throw new IllegalArgumentException( + "If replacement contains wildcard, pattern must contain wildcard"); + } + + // Check if wildcard count matches + if (WildcardRenameUtils.isWildcardPattern(replacement)) { + long patternWildcards = pattern.chars().filter(ch -> ch == '*').count(); + long replacementWildcards = replacement.chars().filter(ch -> ch == '*').count(); + + if (replacementWildcards > patternWildcards) { + throw new IllegalArgumentException( + "Number of wildcards in replacement cannot exceed number of wildcards in pattern"); + } + } + } +} diff --git a/core/src/test/java/org/opensearch/sql/utils/WildcardReplaceUtilsTest.java b/core/src/test/java/org/opensearch/sql/utils/WildcardReplaceUtilsTest.java new file mode 100644 index 00000000000..3f893496fa9 --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/utils/WildcardReplaceUtilsTest.java @@ -0,0 +1,159 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +class WildcardReplaceUtilsTest { + + @Test + void testConvertToRegexPatternNoWildcard() { + assertEquals("TEST", WildcardReplaceUtils.convertToRegexPattern("TEST")); + assertEquals("CLERK", WildcardReplaceUtils.convertToRegexPattern("CLERK")); + assertEquals("", WildcardReplaceUtils.convertToRegexPattern("")); + } + + @Test + void testConvertToRegexPatternWithWildcardEnd() { + assertEquals("^\\QCLERK\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("CLERK*")); + assertEquals("^\\QTEST\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("TEST*")); + } + + @Test + void testConvertToRegexPatternWithWildcardStart() { + assertEquals("(.*)\\QCLERK\\E$", WildcardReplaceUtils.convertToRegexPattern("*CLERK")); + assertEquals("(.*)\\QMAN\\E$", WildcardReplaceUtils.convertToRegexPattern("*MAN")); + } + + @Test + void testConvertToRegexPatternWithWildcardBothEnds() { + assertEquals("(.*)\\QCLERK\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*CLERK*")); + assertEquals("(.*)\\QMAN\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*MAN*")); + } + + @Test + void testConvertToRegexReplacementNoWildcard() { + assertEquals("EMPLOYEE", WildcardReplaceUtils.convertToRegexReplacement("EMPLOYEE")); + assertEquals("PERSON", WildcardReplaceUtils.convertToRegexReplacement("PERSON")); + assertEquals("", WildcardReplaceUtils.convertToRegexReplacement("")); + } + + @Test + void testConvertToRegexReplacementWithWildcardEnd() { + assertEquals("EMPLOYEE$1", WildcardReplaceUtils.convertToRegexReplacement("EMPLOYEE*")); + assertEquals("PERSON$1", WildcardReplaceUtils.convertToRegexReplacement("PERSON*")); + } + + @Test + void testConvertToRegexReplacementWithWildcardStart() { + assertEquals("$1EMPLOYEE", WildcardReplaceUtils.convertToRegexReplacement("*EMPLOYEE")); + assertEquals("$1PERSON", WildcardReplaceUtils.convertToRegexReplacement("*PERSON")); + } + + @Test + void testConvertToRegexReplacementWithWildcardBothEnds() { + assertEquals("$1EMPLOYEE$2", WildcardReplaceUtils.convertToRegexReplacement("*EMPLOYEE*")); + assertEquals("$1PERSON$2", WildcardReplaceUtils.convertToRegexReplacement("*PERSON*")); + } + + @Test + void testValidPatternsNoWildcard() { + // Should not throw any exceptions + WildcardReplaceUtils.validatePatterns("TEST", "REPLACE"); + WildcardReplaceUtils.validatePatterns("CLERK", "EMPLOYEE"); + } + + @Test + void testValidPatternsWithWildcards() { + // Valid combinations + WildcardReplaceUtils.validatePatterns("CLERK*", "EMPLOYEE*"); + WildcardReplaceUtils.validatePatterns("*MAN", "*PERSON"); + WildcardReplaceUtils.validatePatterns("*TEST*", "NEW*"); + WildcardReplaceUtils.validatePatterns("TEST*", "REPLACE"); + } + + @Test + void testInvalidConsecutiveWildcards() { + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> WildcardReplaceUtils.validatePatterns("CLERK**", "EMPLOYEE*")); + assertEquals("Consecutive wildcards are not supported", ex.getMessage()); + + ex = + assertThrows( + IllegalArgumentException.class, + () -> WildcardReplaceUtils.validatePatterns("CLERK*", "EMPLOYEE**")); + assertEquals("Consecutive wildcards are not supported", ex.getMessage()); + } + + @Test + void testInvalidWildcardInReplacementOnly() { + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> WildcardReplaceUtils.validatePatterns("CLERK", "EMPLOYEE*")); + assertEquals( + "If replacement contains wildcard, pattern must contain wildcard", ex.getMessage()); + } + + @Test + void testInvalidWildcardCount() { + IllegalArgumentException ex = + assertThrows( + IllegalArgumentException.class, + () -> WildcardReplaceUtils.validatePatterns("TEST*", "NEW*TEXT*")); + assertEquals( + "Number of wildcards in replacement cannot exceed number of wildcards in pattern", + ex.getMessage()); + } + + @Test + void testValidComplexPatterns() { + // Pattern has more wildcards than replacement + WildcardReplaceUtils.validatePatterns("*TEST*END*", "*NEW*"); + WildcardReplaceUtils.validatePatterns("*PRE*MID*", "START*END"); + WildcardReplaceUtils.validatePatterns("*START*END*", "*REPLACE"); + } + + @Test + void testSpecialCharactersInPatterns() { + assertEquals("^\\Q$TEST\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("$TEST*")); + assertEquals("(.*)\\Q[TEST]\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("*[TEST]*")); + assertEquals("(.*)\\Q.TEST.\\E$", WildcardReplaceUtils.convertToRegexPattern("*.TEST.")); + } + + @Test + void testEmptyPatternWithWildcard() { + // Test single wildcard pattern first + assertEquals("(.*)", WildcardReplaceUtils.convertToRegexPattern("*")); + + // Test wildcard at start + assertEquals("(.*)\\Qa\\E$", WildcardReplaceUtils.convertToRegexPattern("*a")); + + // Test wildcard at end + assertEquals("^\\Qa\\E(.*)", WildcardReplaceUtils.convertToRegexPattern("a*")); + + // Test consecutive wildcards - should throw exception + assertThrows( + IllegalArgumentException.class, () -> WildcardReplaceUtils.validatePatterns("**", "TEST")); + } + + @Test + void testEdgeCasePatterns() { + // Single character patterns + WildcardReplaceUtils.validatePatterns("a*", "b*"); + WildcardReplaceUtils.validatePatterns("*a", "*b"); + WildcardReplaceUtils.validatePatterns("*a*", "*b*"); + + // Empty patterns + WildcardReplaceUtils.validatePatterns("", ""); + WildcardReplaceUtils.validatePatterns("*", "text"); + } +} diff --git a/docs/category.json b/docs/category.json index 38c16255d03..b0c6d968b89 100644 --- a/docs/category.json +++ b/docs/category.json @@ -62,6 +62,7 @@ "user/ppl/cmd/rename.rst", "user/ppl/cmd/rex.rst", "user/ppl/cmd/stats.rst", - "user/ppl/cmd/timechart.rst" + "user/ppl/cmd/timechart.rst", + "user/ppl/cmd/replace.rst" ] } diff --git a/docs/user/ppl/cmd/replace.rst b/docs/user/ppl/cmd/replace.rst new file mode 100644 index 00000000000..300904d48d0 --- /dev/null +++ b/docs/user/ppl/cmd/replace.rst @@ -0,0 +1,107 @@ +============= +replace +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Using ``replace`` command to replace text in one or more fields in the search result. +* The command creates new fields with *new_* prefix for replaced content (e.g., replacing text in 'country' creates 'new_country') +* If a field with *new_* prefix already exists (e.g., 'new_country'), a number will be appended to create a unique field name (e.g., 'new_country0') + + +Version +======= +3.2.0 + + +Syntax +============ +replace '' WITH '' IN [, ]... + +Note: This command is only available when Calcite engine is enabled. + +* pattern: mandatory. The text pattern you want to replace. Currently supports only plain text literals (no wildcards or regular expressions). +* replacement: mandatory. The text you want to replace with. +* field list: mandatory. One or more field names where the replacement should occur. + + +Example 1: Replace text in one field +==================================== + +The example shows replacing text in one field. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | fields state, new_state; + fetched rows / total rows = 4/4 + +-------+-----------+ + | state | new_state | + |-------+-----------| + | IL | Illinois | + | TN | TN | + | VA | VA | + | MD | MD | + +-------+-----------+ + + +Example 2: Replace text in multiple fields +========================================== + +The example shows replacing text in multiple fields. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state, address | fields state, address, new_state, new_address; + fetched rows / total rows = 4/4 + +-------+----------------------+-----------+----------------------+ + | state | address | new_state | new_address | + |-------+----------------------+-----------+----------------------| + | IL | 880 Holmes Lane | Illinois | 880 Holmes Lane | + | TN | 671 Bristol Street | TN | 671 Bristol Street | + | VA | 789 Madison Street | VA | 789 Madison Street | + | MD | 467 Hutchinson Court | MD | 467 Hutchinson Court | + +-------+----------------------+-----------+----------------------+ + + +Example 3: Replace with IN clause and other commands +==================================================== + +The example shows using replace with other commands. + +PPL query:: + + os> source=accounts | replace "IL" WITH "Illinois" IN state | where age > 30 | fields state, age, new_state; + fetched rows / total rows = 3/3 + +-------+-----+-----------+ + | state | age | new_state | + |-------+-----+-----------| + | IL | 32 | Illinois | + | TN | 36 | TN | + | MD | 33 | MD | + +-------+-----+-----------+ + +Example 4: Pattern matching with LIKE and replace +================================================= + +Since replace command only supports plain string literals, you can use LIKE command with replace for pattern matching needs. + +PPL query:: + + os> source=accounts | where LIKE(address, '%Holmes%') | replace "Holmes" WITH "HOLMES" IN address | fields address, state, gender, age, city, new_address; + fetched rows / total rows = 1/1 + +-----------------+-------+--------+-----+--------+-----------------+ + | address | state | gender | age | city | new_address | + |-----------------+-------+--------+-----+--------+-----------------| + | 880 Holmes Lane | IL | M | 32 | Brogan | 880 HOLMES Lane | + +-----------------+-------+--------+-----+--------+-----------------+ + +Note +==== +* For each field specified in the IN clause, a new field is created with prefix *new_* containing the replaced text. The original fields remain unchanged. \ No newline at end of file diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index 09fad5c853d..76eac15273f 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -124,6 +124,8 @@ The query start with search command and then flowing a set of command delimited - `trendline command `_ + - `replace command `_ + - `where command `_ * **Functions** diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index ac6e1d9bd6c..b9ae1b1344e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -88,6 +88,7 @@ CalciteRegexCommandIT.class, CalciteRexCommandIT.class, CalciteRenameCommandIT.class, + CalciteReplaceCommandIT.class, CalciteResourceMonitorIT.class, CalciteSearchCommandIT.class, CalciteSettingsIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java new file mode 100644 index 00000000000..510c1424291 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteReplaceCommandIT.java @@ -0,0 +1,326 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.MatcherUtils.*; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteReplaceCommandIT extends PPLIntegTestCase { + + public void init() throws Exception { + super.init(); + enableCalcite(); + disallowCalciteFallback(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testReplaceWithFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | fields name, age," + + " new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, schema("name", "string"), schema("age", "int"), schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", 70, "United States"), + rows("Hello", 30, "United States"), + rows("John", 25, "Canada"), + rows("Jane", 20, "Canada")); + } + + @Test + public void testMultipleReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country | replace 'Jane' WITH" + + " 'Joseph' IN name", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_name", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "Jake"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "Hello"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "John"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Joseph")); + } + + @Test + public void testReplaceWithSort() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'US' WITH 'United States' IN country | sort new_country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testReplaceWithWhereClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | where country = 'US' | replace 'US' WITH 'United States' IN country", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + } + + @Test + public void testEmptyStringReplacement() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH '' IN country", TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, ""), + rows("Hello", "USA", "New York", 4, 2023, 30, ""), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada")); + } + + @Test + public void testMultipleFieldsInClause() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country,state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string"), + schema("new_country", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "United States", "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "United States", "New York"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Canada", "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Canada", "Quebec")); + } + + @Test + public void testReplaceNonExistentField() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN non_existent_field", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains( + e, + "field [non_existent_field] not found; input fields are: [name, country, state, month," + + " year, age, _id, _index, _score, _maxscore, _sort, _routing]"); + } + + @Test + public void testReplaceAfterFieldRemoved() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | fields name, age | replace 'USA' WITH 'United States' IN" + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "field [country] not found; input fields are: [name, age]"); + } + + @Test + public void testMissingInClause() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States'", + TEST_INDEX_STATE_COUNTRY))); + + verifyErrorMessageContains(e, "[] is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: 'IN'"); + } + + @Test + public void testDuplicateFieldsInReplace() { + Throwable e = + assertThrowsWithReplace( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'USA' WITH 'United States' IN country, state," + + " country", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "Duplicate fields [country] in Replace command"); + } + + @Test + public void testNonStringLiteralPattern() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 23 WITH 'test' IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } + + @Test + public void testNonStringLiteralReplacement() { + Throwable e = + assertThrowsWithReplace( + SyntaxCheckException.class, + () -> + executeQuery( + String.format( + "source = %s | replace 'test' WITH 45 IN field1", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "is not a valid term at this part of the query"); + verifyErrorMessageContains(e, "Expecting tokens: DQUOTA_STRING, SQUOTA_STRING"); + } + + @Test + public void testWildcardEndReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace 'CA*' WITH 'California*' IN state", + TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "New York"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Quebec")); + } + + @Test + public void testWildcardStartReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace '*York' WITH '*Jersey' IN state", TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "New Jersey"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Quebec")); + } + + @Test + public void testWildcardOrderOfValuesReplace() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source = %s | replace '* York' WITH 'York *' IN state", TEST_INDEX_STATE_COUNTRY)); + + verifySchema( + result, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("new_state", "string")); + + verifyDataRows( + result, + rows("Jake", "USA", "California", 4, 2023, 70, "California"), + rows("Hello", "USA", "New York", 4, 2023, 30, "York New"), + rows("John", "Canada", "Ontario", 4, 2023, 25, "Ontario"), + rows("Jane", "Canada", "Quebec", 4, 2023, 20, "Quebec")); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 17193858ab6..2d6d6bd123c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -78,6 +78,7 @@ commands | regexCommand | timechartCommand | rexCommand + | replaceCommand ; commandName @@ -115,6 +116,7 @@ commandName | REGEX | APPEND | REX + | REPLACE ; searchCommand @@ -155,6 +157,10 @@ renameCommand : RENAME renameClasue (COMMA? renameClasue)* ; +replaceCommand + : REPLACE pattern=stringLiteral WITH replacement=stringLiteral IN fieldList + ; + statsCommand : STATS (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)? ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index e27ca500633..a6df9861da3 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -92,6 +92,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.SPath; @@ -372,6 +373,20 @@ public UnresolvedPlan visitRenameCommand(RenameCommandContext ctx) { .collect(Collectors.toList())); } + /** Replace command. */ + @Override + public UnresolvedPlan visitReplaceCommand(OpenSearchPPLParser.ReplaceCommandContext ctx) { + UnresolvedExpression pattern = internalVisitExpression(ctx.pattern); + UnresolvedExpression replacement = internalVisitExpression(ctx.replacement); + + List fieldList = + ctx.fieldList().fieldExpression().stream() + .map(field -> (Field) internalVisitExpression(field)) + .collect(Collectors.toList()); + + return new Replace(pattern, replacement, fieldList); + } + /** Stats command. */ @Override public UnresolvedPlan visitStatsCommand(StatsCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d888d451c10..ee80b8c5563 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -77,6 +77,7 @@ import org.opensearch.sql.ast.tree.Regex; import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; +import org.opensearch.sql.ast.tree.Replace; import org.opensearch.sql.ast.tree.Reverse; import org.opensearch.sql.ast.tree.Rex; import org.opensearch.sql.ast.tree.Sort; @@ -267,6 +268,34 @@ public String visitRename(Rename node, String context) { return StringUtils.format("%s | rename %s", child, renames); } + @Override + public String visitReplace(Replace node, String context) { + // Get the child query string + String child = node.getChild().get(0).accept(this, context); + + // Get pattern and replacement expressions + String pattern = visitExpression(node.getPattern()); + String replacement = visitExpression(node.getReplacement()); + + // Handle field list if present + String fieldListStr = ""; + if (node.getFieldList() != null && !node.getFieldList().isEmpty()) { + fieldListStr = + " IN " + + node.getFieldList().stream() + .map(this::visitFieldExpression) + .collect(Collectors.joining(", ")); + } + + // Build the replace command string + return StringUtils.format( + "%s | replace %s WITH %s%s", child, pattern, replacement, fieldListStr); + } + + private String visitFieldExpression(Field field) { + return field.toString(); + } + /** Build {@link LogicalAggregation}. */ @Override public String visitAggregation(Aggregation node, String context) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java new file mode 100644 index 00000000000..8c3b3b35213 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLReplaceTest.java @@ -0,0 +1,403 @@ +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLReplaceTest extends CalcitePPLAbstractTest { + + public CalcitePPLReplaceTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testBasicReplace() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK'," + + " 'EMPLOYEE')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null;" + + " DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00;" + + " COMM=300.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00;" + + " COMM=500.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00;" + + " COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00;" + + " COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00;" + + " COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00;" + + " COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00;" + + " COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00;" + + " COMM=0.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00;" + + " COMM=null; DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00;" + + " COMM=null; DEPTNO=30; new_JOB=EMPLOYEE\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00;" + + " COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00;" + + " COMM=null; DEPTNO=10; new_JOB=EMPLOYEE\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testMultipleFieldsReplace() { + String ppl = + "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | replace \"20\" WITH \"RESEARCH\"" + + " IN DEPTNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK', 'EMPLOYEE')]," + + " new_DEPTNO=[REPLACE($7, '20', 'RESEARCH')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`, " + + "REPLACE(`DEPTNO`, '20', 'RESEARCH') `new_DEPTNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceSameValueInMultipleFields() { + // In EMP table, both JOB and MGR fields contain numeric values + String ppl = "source=EMP | replace \"7839\" WITH \"CEO\" IN MGR, EMPNO"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], new_MGR=[REPLACE($3, '7839', 'CEO')]," + + " new_EMPNO=[REPLACE($0, '7839', 'CEO')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`MGR`, '7839', 'CEO') `new_MGR`, " + + "REPLACE(`EMPNO`, '7839', 'CEO') `new_EMPNO`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testReplaceWithPipeline() { + String ppl = + "source=EMP | where JOB = 'CLERK' | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB | sort SAL"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalSort(sort0=[$5], dir0=[ASC-nulls-first])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], new_JOB=[REPLACE($2, 'CLERK'," + + " 'EMPLOYEE')])\n" + + " LogicalFilter(condition=[=($2, 'CLERK':VARCHAR)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REPLACE(`JOB`, 'CLERK', 'EMPLOYEE') `new_JOB`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `JOB` = 'CLERK'\n" + + "ORDER BY `SAL`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testWildcardEndReplace() { + String ppl = "source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], " + + "COMM=[$6], DEPTNO=[$7], new_JOB=[REGEXP_REPLACE($2, '^\\QCLERK\\E(.*)', " + + "'EMPLOYEE$1')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REGEXP_REPLACE(`JOB`, '^\\QCLERK\\E(.*)', 'EMPLOYEE$1') `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null; " + + "DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; " + + "COMM=300.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; " + + "COMM=500.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; " + + "COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; " + + "COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; " + + "COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; " + + "COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; " + + "COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; " + + "COMM=0.00; DEPTNO=30; new_JOB=SALESMAN\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; " + + "COMM=null; DEPTNO=20; new_JOB=EMPLOYEE\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; " + + "COMM=null; DEPTNO=30; new_JOB=EMPLOYEE\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; " + + "COMM=null; DEPTNO=10; new_JOB=EMPLOYEE\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testWildcardStartReplace() { + String ppl = "source=EMP | replace \"*MAN\" WITH \"*PERSON\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], " + + "COMM=[$6], DEPTNO=[$7], new_JOB=[REGEXP_REPLACE($2, '(.*)\\QMAN\\E$', " + + "'$1PERSON')])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REGEXP_REPLACE(`JOB`, '(.*)\\QMAN\\E$', '$1PERSON') `new_JOB`\n" + + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null; " + + "DEPTNO=20; new_JOB=CLERK\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; " + + "COMM=300.00; DEPTNO=30; new_JOB=SALESPERSON\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; " + + "COMM=500.00; DEPTNO=30; new_JOB=SALESPERSON\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; " + + "COMM=null; DEPTNO=20; new_JOB=MANAGER\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; " + + "COMM=1400.00; DEPTNO=30; new_JOB=SALESPERSON\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; " + + "COMM=null; DEPTNO=30; new_JOB=MANAGER\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; " + + "COMM=null; DEPTNO=10; new_JOB=MANAGER\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; " + + "COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; " + + "COMM=0.00; DEPTNO=30; new_JOB=SALESPERSON\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; " + + "COMM=null; DEPTNO=20; new_JOB=CLERK\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; " + + "COMM=null; DEPTNO=30; new_JOB=CLERK\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; " + + "COMM=null; DEPTNO=10; new_JOB=CLERK\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testWildcardBothEndsReplace() { + String ppl = "source=EMP | replace \"*MAN*\" WITH \"PERSON*\" IN JOB"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], " + + "COMM=[$6], DEPTNO=[$7], new_JOB=[REGEXP_REPLACE($2, '(.*)\\QMAN\\E(.*)', " + + "'PERSON$1')])\n" + + // Changed to $1 as per implementation + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REGEXP_REPLACE(`JOB`, '(.*)\\QMAN\\E(.*)', 'PERSON$1') `new_JOB`\n" + + // Changed to $1 + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null; " + + "DEPTNO=20; new_JOB=CLERK\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; " + + "COMM=300.00; DEPTNO=30; new_JOB=PERSONSALES\n" + + // PERSON + content before MAN + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; " + + "COMM=500.00; DEPTNO=30; new_JOB=PERSONSALES\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; " + + "COMM=null; DEPTNO=20; new_JOB=PERSON\n" + + // PERSON + nothing before MAN + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; " + + "COMM=1400.00; DEPTNO=30; new_JOB=PERSONSALES\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; " + + "COMM=null; DEPTNO=30; new_JOB=PERSON\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; " + + "COMM=null; DEPTNO=10; new_JOB=PERSON\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; " + + "COMM=null; DEPTNO=10; new_JOB=PRESIDENT\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; " + + "COMM=0.00; DEPTNO=30; new_JOB=PERSONSALES\n" + + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; " + + "COMM=null; DEPTNO=20; new_JOB=CLERK\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; " + + "COMM=null; DEPTNO=30; new_JOB=CLERK\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; " + + "COMM=null; DEPTNO=10; new_JOB=CLERK\n"; + + verifyResult(root, expectedResult); + } + + @Test + public void testMultipleWildcardReplace() { + String ppl = + "source=EMP | replace \"CLERK*\" WITH \"EMPLOYEE*\" IN JOB | " + + "replace \"*ER\" WITH \"*OR\" IN ENAME"; + RelNode root = getRelNode(ppl); + + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], " + + "COMM=[$6], DEPTNO=[$7], new_JOB=[REGEXP_REPLACE($2, '^\\QCLERK\\E(.*)', " + + "'EMPLOYEE$1')], new_ENAME=[REGEXP_REPLACE($1, '(.*)\\QER\\E$', " + + "'$1OR')])\n" + + // Changed to match ending ER pattern + " LogicalTableScan(table=[[scott, EMP]])\n"; + + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, " + + "REGEXP_REPLACE(`JOB`, '^\\QCLERK\\E(.*)', 'EMPLOYEE$1') `new_JOB`, " + + "REGEXP_REPLACE(`ENAME`, '(.*)\\QER\\E$', '$1OR') `new_ENAME`\n" + + // Changed to match ending ER + "FROM `scott`.`EMP`"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + + String expectedResult = + "EMPNO=7369; ENAME=SMITH; JOB=CLERK; MGR=7902; HIREDATE=1980-12-17; SAL=800.00; COMM=null; " + + "DEPTNO=20; new_JOB=EMPLOYEE; new_ENAME=SMITH\n" + + "EMPNO=7499; ENAME=ALLEN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-20; SAL=1600.00; " + + "COMM=300.00; DEPTNO=30; new_JOB=SALESMAN; new_ENAME=ALLEN\n" + + "EMPNO=7521; ENAME=WARD; JOB=SALESMAN; MGR=7698; HIREDATE=1981-02-22; SAL=1250.00; " + + "COMM=500.00; DEPTNO=30; new_JOB=SALESMAN; new_ENAME=WARD\n" + + "EMPNO=7566; ENAME=JONES; JOB=MANAGER; MGR=7839; HIREDATE=1981-02-04; SAL=2975.00; " + + "COMM=null; DEPTNO=20; new_JOB=MANAGER; new_ENAME=JONES\n" + + "EMPNO=7654; ENAME=MARTIN; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-28; SAL=1250.00; " + + "COMM=1400.00; DEPTNO=30; new_JOB=SALESMAN; new_ENAME=MARTIN\n" + + "EMPNO=7698; ENAME=BLAKE; JOB=MANAGER; MGR=7839; HIREDATE=1981-01-05; SAL=2850.00; " + + "COMM=null; DEPTNO=30; new_JOB=MANAGER; new_ENAME=BLAKE\n" + + "EMPNO=7782; ENAME=CLARK; JOB=MANAGER; MGR=7839; HIREDATE=1981-06-09; SAL=2450.00; " + + "COMM=null; DEPTNO=10; new_JOB=MANAGER; new_ENAME=CLARK\n" + + "EMPNO=7788; ENAME=SCOTT; JOB=ANALYST; MGR=7566; HIREDATE=1987-04-19; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST; new_ENAME=SCOTT\n" + + "EMPNO=7839; ENAME=KING; JOB=PRESIDENT; MGR=null; HIREDATE=1981-11-17; SAL=5000.00; " + + "COMM=null; DEPTNO=10; new_JOB=PRESIDENT; new_ENAME=KING\n" + + "EMPNO=7844; ENAME=TURNER; JOB=SALESMAN; MGR=7698; HIREDATE=1981-09-08; SAL=1500.00; " + + "COMM=0.00; DEPTNO=30; new_JOB=SALESMAN; new_ENAME=TURNOR\n" + + // TURNER -> TURNOR + "EMPNO=7876; ENAME=ADAMS; JOB=CLERK; MGR=7788; HIREDATE=1987-05-23; SAL=1100.00; " + + "COMM=null; DEPTNO=20; new_JOB=EMPLOYEE; new_ENAME=ADAMS\n" + + "EMPNO=7900; ENAME=JAMES; JOB=CLERK; MGR=7698; HIREDATE=1981-12-03; SAL=950.00; " + + "COMM=null; DEPTNO=30; new_JOB=EMPLOYEE; new_ENAME=JAMES\n" + + "EMPNO=7902; ENAME=FORD; JOB=ANALYST; MGR=7566; HIREDATE=1981-12-03; SAL=3000.00; " + + "COMM=null; DEPTNO=20; new_JOB=ANALYST; new_ENAME=FORD\n" + + "EMPNO=7934; ENAME=MILLER; JOB=CLERK; MGR=7782; HIREDATE=1982-01-23; SAL=1300.00; " + + "COMM=null; DEPTNO=10; new_JOB=EMPLOYEE; new_ENAME=MILLOR\n"; // MILLER -> MILLOR + + verifyResult(root, expectedResult); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutWithKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithoutInKeywordShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" JOB"; + getRelNode(ppl); + } + + @Test(expected = RuntimeException.class) + public void testReplaceWithExpressionShouldFail() { + String ppl = "source=EMP | replace EMPNO + 1 WITH \"EMPLOYEE\" IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithInvalidFieldShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN INVALID_FIELD"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMultipleInKeywordsShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH \"EMPLOYEE\" IN JOB IN ENAME"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingQuotesShouldFail() { + String ppl = "source=EMP | replace CLERK WITH EMPLOYEE IN JOB"; + getRelNode(ppl); + } + + @Test(expected = Exception.class) + public void testReplaceWithMissingReplacementValueShouldFail() { + String ppl = "source=EMP | replace \"CLERK\" WITH IN JOB"; + getRelNode(ppl); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index c805e5a5dfb..6a2217202a2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -546,6 +546,13 @@ public void testGrok() { anonymize("source=t | grok email '.+@%{HOSTNAME:host}' | fields email, host")); } + @Test + public void testReplaceCommand() { + assertEquals( + "source=EMP | replace *** WITH *** IN Field(field=fieldname, fieldArgs=[])", + anonymize("source=EMP | replace \"value\" WITH \"newvalue\" IN fieldname")); + } + @Test public void testPatterns() { when(settings.getSettingValue(Key.PATTERN_METHOD)).thenReturn("SIMPLE_PATTERN");