Set analyzer to regex query string search (#3967) (#4220)

Sets analyzer to regex query string search Signed-off-by: yyyogev <yogev.metzuyanim@logz.io> (cherry picked from commit ea4cfcc) Signed-off-by: Daniel (dB.) Doubrovkine <dblock@amazon.com> Signed-off-by: Daniel (dB.) Doubrovkine <dblock@amazon.com> Co-authored-by: Yogev Mets <yyyogev@gmail.com>
opensearch-project · Sep 6, 2022 · 44c5eff · 44c5eff
1 parent 362d489
commit 44c5eff
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 7 deletions.
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml
@@ -21,6 +21,12 @@ setup:
           id:     1
           body:   { text: some short words with a stupendously long one }
 
+  - do:
+      index:
+        index:  test
+        id:     2
+        body:   { text: sentence with UPPERCASE WORDS }
+
   - do:
       indices.refresh:
         index: [test]
@@ -83,6 +89,22 @@ setup:
   - match: {hits.max_score: 1}
   - match: {hits.hits.0._score: 1}
 
+---
+"search with uppercase regex":
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test
+        body:
+          query:
+            query_string:
+              default_field: text
+              query: /UPPERCASE/
+
+  - match: {hits.total: 1}
+  - match: {hits.max_score: 1}
+  - match: {hits.hits.0._score: 1}
+
 ---
 "search index prefixes with span_multi":
   - skip:

diff --git a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java
@@ -56,7 +56,6 @@
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.RegExp;
 import org.opensearch.common.lucene.search.Queries;
 import org.opensearch.common.regex.Regex;
 import org.opensearch.common.unit.Fuzziness;
@@ -562,7 +561,7 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
             if (currentFieldType == null || currentFieldType.getTextSearchInfo() == TextSearchInfo.NONE) {
                 return newUnmappedFieldQuery(field);
             }
-            setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer);
+            setAnalyzer(getSearchAnalyzer(currentFieldType));
             Query query = null;
             if (currentFieldType.getTextSearchInfo().isTokenized() == false) {
                 query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context);
@@ -738,6 +737,13 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
         }
     }
 
+    private Analyzer getSearchAnalyzer(MappedFieldType currentFieldType) {
+        if (forceAnalyzer == null) {
+            return queryBuilder.context.getSearchAnalyzer(currentFieldType);
+        }
+        return forceAnalyzer;
+    }
+
     @Override
     protected Query getRegexpQuery(String field, String termStr) throws ParseException {
         final int maxAllowedRegexLength = context.getIndexSettings().getMaxRegexLength();
@@ -778,11 +784,8 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
             if (currentFieldType == null) {
                 return newUnmappedFieldQuery(field);
             }
-            if (forceAnalyzer != null) {
-                setAnalyzer(forceAnalyzer);
-                return super.getRegexpQuery(field, termStr);
-            }
-            return currentFieldType.regexpQuery(termStr, RegExp.ALL, 0, getDeterminizeWorkLimit(), getMultiTermRewriteMethod(), context);
+            setAnalyzer(getSearchAnalyzer(currentFieldType));
+            return super.getRegexpQuery(field, termStr);
         } catch (RuntimeException e) {
             if (lenient) {
                 return newLenientFieldQuery(field, e);