Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
Expand Down Expand Up @@ -737,6 +738,11 @@ public LogicalPlan visitReverse(Reverse node, AnalysisContext context) {
throw getOnlyForCalciteException("Reverse");
}

@Override
public LogicalPlan visitRegex(Regex node, AnalysisContext context) {
throw getOnlyForCalciteException("Regex");
}

@Override
public LogicalPlan visitPaginate(Paginate paginate, AnalysisContext context) {
LogicalPlan child = paginate.getChild().get(0).accept(this, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
Expand Down Expand Up @@ -254,6 +255,10 @@ public T visitReverse(Reverse node, C context) {
return visitChildren(node, context);
}

public T visitRegex(Regex node, C context) {
return visitChildren(node, context);
}

public T visitLambdaFunction(LambdaFunction node, C context) {
return visitChildren(node, context);
}
Expand Down
55 changes: 55 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/Regex.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.List;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
public class Regex extends UnresolvedPlan {
public static final String EQUALS_OPERATOR = "=";

public static final String NOT_EQUALS_OPERATOR = "!=";

private final UnresolvedExpression field;

private final boolean negated;

private final Literal pattern;

@Setter private UnresolvedPlan child;

public Regex(UnresolvedExpression field, boolean negated, Literal pattern) {
this.field = field;
this.negated = negated;
this.pattern = pattern;
}

@Override
public Regex attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitRegex(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexWindowBounds;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilder.AggCall;
Expand Down Expand Up @@ -99,6 +100,7 @@
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Regex;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Sort;
Expand Down Expand Up @@ -173,6 +175,32 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) {
return context.relBuilder.peek();
}

@Override
public RelNode visitRegex(Regex node, CalcitePlanContext context) {
visitChildren(node, context);

RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
RexNode patternRex = rexVisitor.analyze(node.getPattern(), context);

if (!SqlTypeFamily.CHARACTER.contains(fieldRex.getType())) {
throw new IllegalArgumentException(
String.format(
"Regex command requires field of string type, but got %s for field '%s'",
fieldRex.getType().getSqlTypeName(), node.getField().toString()));
}

RexNode regexCondition =
context.rexBuilder.makeCall(
org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_CONTAINS, fieldRex, patternRex);

if (node.isNegated()) {
regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition);
}

context.relBuilder.filter(regexCondition);
return context.relBuilder.peek();
}

private boolean containsSubqueryExpression(Node expr) {
if (expr == null) {
return false;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.parse;

import com.google.common.collect.ImmutableList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
* Common utilities for regex operations. Provides pattern caching and consistent matching behavior.
*/
public class RegexCommonUtils {

private static final Pattern NAMED_GROUP_PATTERN =
Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>");

private static final int MAX_CACHE_SIZE = 1000;

private static final Map<String, Pattern> patternCache =
Collections.synchronizedMap(
new LinkedHashMap<>(MAX_CACHE_SIZE + 1, 0.75f, true) {
@Override
protected boolean removeEldestEntry(Map.Entry<String, Pattern> eldest) {
return size() > MAX_CACHE_SIZE;
}
});

/**
* Get compiled pattern from cache or compile and cache it.
*
* @param regex The regex pattern string
* @return Compiled Pattern object
* @throws PatternSyntaxException if the regex is invalid
*/
public static Pattern getCompiledPattern(String regex) {
Pattern pattern = patternCache.get(regex);
if (pattern == null) {
pattern = Pattern.compile(regex);
patternCache.put(regex, pattern);
}
return pattern;
}

/**
* Extract list of named group candidates from a regex pattern.
*
* @param pattern The regex pattern string
* @return List of named group names found in the pattern
*/
public static List<String> getNamedGroupCandidates(String pattern) {
ImmutableList.Builder<String> namedGroups = ImmutableList.builder();
Matcher m = NAMED_GROUP_PATTERN.matcher(pattern);
while (m.find()) {
namedGroups.add(m.group(1));
}
return namedGroups.build();
}

/**
* Match using find() for partial match semantics with string pattern.
*
* @param text The text to match against
* @param patternStr The pattern string
* @return true if pattern is found anywhere in the text
* @throws PatternSyntaxException if the regex is invalid
*/
public static boolean matchesPartial(String text, String patternStr) {
if (text == null || patternStr == null) {
return false;
}
Pattern pattern = getCompiledPattern(patternStr);
return pattern.matcher(text).find();
}

/**
* Extract a specific named group from text using the pattern. Used by parse command regex method.
*
* @param text The text to extract from
* @param pattern The compiled pattern with named groups
* @param groupName The name of the group to extract
* @return The extracted value or null if not found
*/
public static String extractNamedGroup(String text, Pattern pattern, String groupName) {
if (text == null || pattern == null || groupName == null) {
return null;
}

Matcher matcher = pattern.matcher(text);

if (matcher.matches()) {
try {
return matcher.group(groupName);
} catch (IllegalArgumentException e) {
return null;
}
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@

package org.opensearch.sql.expression.parse;

import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.EqualsAndHashCode;
import lombok.Getter;
Expand All @@ -24,7 +21,6 @@
@ToString
public class RegexExpression extends ParseExpression {
private static final Logger log = LogManager.getLogger(RegexExpression.class);
private static final Pattern GROUP_PATTERN = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>");
@Getter @EqualsAndHashCode.Exclude private final Pattern regexPattern;

/**
Expand All @@ -36,32 +32,19 @@ public class RegexExpression extends ParseExpression {
*/
public RegexExpression(Expression sourceField, Expression pattern, Expression identifier) {
super("regex", sourceField, pattern, identifier);
this.regexPattern = Pattern.compile(pattern.valueOf().stringValue());
this.regexPattern = RegexCommonUtils.getCompiledPattern(pattern.valueOf().stringValue());
}

@Override
ExprValue parseValue(ExprValue value) throws ExpressionEvaluationException {
String rawString = value.stringValue();
Matcher matcher = regexPattern.matcher(rawString);
if (matcher.matches()) {
return new ExprStringValue(matcher.group(identifierStr));

String extracted = RegexCommonUtils.extractNamedGroup(rawString, regexPattern, identifierStr);

if (extracted != null) {
return new ExprStringValue(extracted);
}
log.debug("failed to extract pattern {} from input ***", regexPattern.pattern());
return new ExprStringValue("");
}

/**
* Get list of derived fields based on parse pattern.
*
* @param pattern pattern used for parsing
* @return list of names of the derived fields
*/
public static List<String> getNamedGroupCandidates(String pattern) {
ImmutableList.Builder<String> namedGroups = ImmutableList.builder();
Matcher m = GROUP_PATTERN.matcher(pattern);
while (m.find()) {
namedGroups.add(m.group(1));
}
return namedGroups.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.opensearch.sql.expression.parse.GrokExpression;
import org.opensearch.sql.expression.parse.ParseExpression;
import org.opensearch.sql.expression.parse.PatternsExpression;
import org.opensearch.sql.expression.parse.RegexCommonUtils;
import org.opensearch.sql.expression.parse.RegexExpression;

/** Utils for {@link ParseExpression}. */
Expand Down Expand Up @@ -57,7 +58,7 @@ public static List<String> getNamedGroupCandidates(
ParseMethod parseMethod, String pattern, Map<String, Literal> arguments) {
switch (parseMethod) {
case REGEX:
return RegexExpression.getNamedGroupCandidates(pattern);
return RegexCommonUtils.getNamedGroupCandidates(pattern);
case GROK:
return GrokExpression.getNamedGroupCandidates(pattern);
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1929,4 +1929,18 @@ public void brain_patterns_command() {

assertAnalyzeEqual(expectedPlan, patterns);
}

@Test
public void regex_command_throws_unsupported_exception_with_legacy_engine() {
UnsupportedOperationException exception =
assertThrows(
UnsupportedOperationException.class,
() ->
analyze(
new org.opensearch.sql.ast.tree.Regex(
field("lastname"), false, stringLiteral("^[A-Z][a-z]+$"))
.attach(relation("schema"))));
assertEquals(
"Regex is supported only when plugins.calcite.enabled=true", exception.getMessage());
}
}
Loading
Loading