diff --git a/CHANGELOG.md b/CHANGELOG.md index e69ea261c0c49..7a079d44791f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix Allocation and Rebalance Constraints of WeightFunction are incorrectly reset ([#19012](https://github.com/opensearch-project/OpenSearch/pull/19012)) - Fix flaky test FieldDataLoadingIT.testIndicesFieldDataCacheSizeSetting ([#19571](https://github.com/opensearch-project/OpenSearch/pull/19571)) +- Fix case-insensitive wildcard + aggregation query crash ([#19489](https://github.com/opensearch-project/OpenSearch/pull/19489)) - Avoid primary shard failure caused by merged segment warmer exceptions ([#19436](https://github.com/opensearch-project/OpenSearch/pull/19436)) ### Dependencies diff --git a/server/src/main/java/org/opensearch/common/lucene/search/AutomatonQueries.java b/server/src/main/java/org/opensearch/common/lucene/search/AutomatonQueries.java index fa1d88a3a6537..2b45d961ddc87 100644 --- a/server/src/main/java/org/opensearch/common/lucene/search/AutomatonQueries.java +++ b/server/src/main/java/org/opensearch/common/lucene/search/AutomatonQueries.java @@ -39,6 +39,7 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import java.util.ArrayList; import java.util.Arrays; @@ -92,7 +93,14 @@ public static AutomatonQuery caseInsensitiveTermQuery(Term term) { * Build an automaton matching a wildcard pattern, ASCII case insensitive, if the method is null, then will use {@link MultiTermQuery#CONSTANT_SCORE_BLENDED_REWRITE}. */ public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery, MultiTermQuery.RewriteMethod method) { - return createAutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery), method); + Automaton automaton = toCaseInsensitiveWildcardAutomaton(wildcardquery); + try { + automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT); + } catch (TooComplexToDeterminizeException e) { + throw new RuntimeException("Wildcard query too complex to determinize for term: " + wildcardquery, e); + } + assert automaton.isDeterministic(); + return createAutomatonQuery(wildcardquery, automaton, method); } /** diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java index 130821df8dd56..9653737c008c8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java @@ -48,6 +48,7 @@ import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.opensearch.OpenSearchException; import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.lucene.Lucene; @@ -61,7 +62,9 @@ import java.util.Collections; import java.util.List; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE; import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE; @@ -231,6 +234,56 @@ public void testIndexPrefixes() { assertThat(q, equalTo(expected)); } + public void testCaseInsensitiveWildcardQueryDeterminization() { + Term wildcardTerm = new Term("field", "test*"); + Query result = AutomatonQueries.caseInsensitiveWildcardQuery(wildcardTerm, null); + + assertNotNull(result); + assertTrue(((AutomatonQuery) result).getAutomaton().isDeterministic()); + } + + private String createComplexPattern(int repetitions, String basePattern) { + StringBuilder pattern = new StringBuilder(); + for (int i = 0; i < repetitions; i++) { + pattern.append(basePattern); + } + return pattern.toString(); + } + + private String createExponentialPattern(int depth) { + StringBuilder pattern = new StringBuilder(); + for (int i = 0; i < depth; i++) { + pattern.append("("); + for (int j = 0; j < 5; j++) { + pattern.append((char) ('a' + (i * 5 + j) % 26)).append("*"); + } + pattern.append(")*"); + } + return pattern.toString(); + } + + public void testCaseInsensitiveWildcardQueryTooComplexToDeterminize() { + String[] complexPatterns = { + createComplexPattern(200, "a*b*c*d*e*f*g*h*i*j*"), + createComplexPattern(150, "*[a-z]*[A-Z]*[0-9]*"), + createExponentialPattern(10) }; + + for (String pattern : complexPatterns) { + Term complexTerm = new Term("field", pattern); + + try { + AutomatonQuery result = AutomatonQueries.caseInsensitiveWildcardQuery(complexTerm, null); + assertNotNull(result); + assertTrue(result.getAutomaton().isDeterministic()); + } catch (RuntimeException e) { + assertThat(e.getCause(), instanceOf(TooComplexToDeterminizeException.class)); + assertThat(e.getMessage(), containsString("Wildcard query too complex to determinize for term:")); + assertThat(e.getMessage(), containsString(complexTerm.toString())); + return; + } + } + } + public void testFetchSourceValue() throws IOException { TextFieldType fieldType = createFieldType(true); fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);