From 996884067d44d3b2a8fb6822ca043abc2da6ff78 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 12 Oct 2022 19:01:50 -0500 Subject: [PATCH] [Backport 2.x] Set analyzer to regex query string search (#4219) Sets analyzer to regex query string search Signed-off-by: yogev mets (cherry picked from commit ea4cfcc1f1a4788982e8f7bb3175b0903226ca48) Co-authored-by: Yogev Mets Co-authored-by: Nicholas Walter Knize --- CHANGELOG.md | 1 + .../test/search/190_index_prefix_search.yml | 22 +++++++++++++++++++ .../index/search/QueryStringQueryParser.java | 17 ++++++++------ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a21b1277155d2..d948d1bc0e27f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Updated jackson to 2.13.4 and snakeyml to 1.32 ([#4563](https://github.com/opensearch-project/OpenSearch/pull/4563)) - Add BWC version 1.3.7 ([#4709](https://github.com/opensearch-project/OpenSearch/pull/4709)) - Bump `jettison` from 1.4.1 to 1.5.1 ([#4717](https://github.com/opensearch-project/OpenSearch/pull/4717)) +- Set analyzer to regex query string search ([4219](https://github.com/opensearch-project/OpenSearch/pull/4219)) ### Changed diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml index 6f276f669f815..ca8e89aa2d5b5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml @@ -17,6 +17,12 @@ setup: id: 1 body: { text: some short words with a stupendously long one } + - do: + index: + index: test + id: 2 + body: { text: sentence with UPPERCASE WORDS } + - do: indices.refresh: index: [test] @@ -76,6 +82,22 @@ setup: - match: {hits.max_score: 1} - match: {hits.hits.0._score: 1} +--- +"search with uppercase regex": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + query_string: + default_field: text + query: /UPPERCASE/ + + - match: {hits.total: 1} + - match: {hits.max_score: 1} + - match: {hits.hits.0._score: 1} + --- "search index prefixes with span_multi": - do: diff --git a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java index cdb7464ff250a..6d59e861eb32f 100644 --- a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java +++ b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java @@ -56,7 +56,6 @@ import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.automaton.RegExp; import org.opensearch.common.lucene.search.Queries; import org.opensearch.common.regex.Regex; import org.opensearch.common.unit.Fuzziness; @@ -565,7 +564,7 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc if (currentFieldType == null || currentFieldType.getTextSearchInfo() == TextSearchInfo.NONE) { return newUnmappedFieldQuery(field); } - setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer); + setAnalyzer(getSearchAnalyzer(currentFieldType)); Query query = null; if (currentFieldType.getTextSearchInfo().isTokenized() == false) { query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context); @@ -741,6 +740,13 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE } } + private Analyzer getSearchAnalyzer(MappedFieldType currentFieldType) { + if (forceAnalyzer == null) { + return queryBuilder.context.getSearchAnalyzer(currentFieldType); + } + return forceAnalyzer; + } + @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { final int maxAllowedRegexLength = context.getIndexSettings().getMaxRegexLength(); @@ -781,11 +787,8 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc if (currentFieldType == null) { return newUnmappedFieldQuery(field); } - if (forceAnalyzer != null) { - setAnalyzer(forceAnalyzer); - return super.getRegexpQuery(field, termStr); - } - return currentFieldType.regexpQuery(termStr, RegExp.ALL, 0, getDeterminizeWorkLimit(), getMultiTermRewriteMethod(), context); + setAnalyzer(getSearchAnalyzer(currentFieldType)); + return super.getRegexpQuery(field, termStr); } catch (RuntimeException e) { if (lenient) { return newLenientFieldQuery(field, e);