From 5b847971b5cf3d04484dcc42f6eb004bf7500c86 Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 16 Sep 2025 12:43:00 +0200 Subject: [PATCH 1/6] Query multiple indices with simplified RRF --- .../xpack/rank/RankRRFFeatures.java | 3 +- .../xpack/rank/rrf/RRFRetrieverBuilder.java | 10 +- .../rank/rrf/RRFRetrieverBuilderTests.java | 213 ++++++++++++++++++ .../test/rrf/310_rrf_retriever_simplified.yml | 173 +++++++++++--- 4 files changed, 355 insertions(+), 44 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 641cf049042bc..58e047e149309 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -40,7 +40,8 @@ public Set getTestFeatures() { RRFRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT, RRFRetrieverBuilder.WEIGHTED_SUPPORT, LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER, - LinearRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT + LinearRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT, + RRFRetrieverBuilder.MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT ); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java index 702bb0df0f9eb..7faad9917661d 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilder.java @@ -50,7 +50,9 @@ public final class RRFRetrieverBuilder extends CompoundRetrieverBuilder { public static final NodeFeature MULTI_FIELDS_QUERY_FORMAT_SUPPORT = new NodeFeature("rrf_retriever.multi_fields_query_format_support"); public static final NodeFeature WEIGHTED_SUPPORT = new NodeFeature("rrf_retriever.weighted_support"); - + public static final NodeFeature MULTI_INDEX_SIMPLIFIED_FORMAT_SUPPORT = new NodeFeature( + "rrf_retriever.multi_index_simplified_format_support" + ); public static final String NAME = "rrf"; public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers"); @@ -253,11 +255,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { // TODO: Refactor duplicate code // Using the multi-fields query format var localIndicesMetadata = resolvedIndices.getConcreteLocalIndicesMetadata(); - if (localIndicesMetadata.size() > 1) { - throw new IllegalArgumentException( - "[" + NAME + "] cannot specify [" + QUERY_FIELD.getPreferredName() + "] when querying multiple indices" - ); - } else if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) { + if (resolvedIndices.getRemoteClusterIndices().isEmpty() == false) { throw new IllegalArgumentException( "[" + NAME + "] cannot specify [" + QUERY_FIELD.getPreferredName() + "] when querying remote indices" ); diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 143293903de2d..97b3ab2013b88 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ResolvedIndices; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.builder.PointInTimeBuilder; @@ -235,6 +236,218 @@ public void testMultiFieldsParamsRewrite() { ); } + public void testMultiIndexMultiFieldsParamsRewrite() { + String indexName = "test-index"; + String anotherIndexName = "test-another-index"; + final ResolvedIndices resolvedIndices = createMockResolvedIndices( + Map.of( + indexName, + List.of("semantic_field_1", "semantic_field_2"), + anotherIndexName, + List.of("semantic_field_2", "semantic_field_3") + ), + null, + Map.of() // use random and different inference IDs for semantic_text fields + ); + + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + TransportVersion.current(), + RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null, + null + ); + + // No wildcards, no per-field boosting + RRFRetrieverBuilder retriever = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.0f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(indexName)), // field with different inference IDs, we filter on index name + 1.0f, + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), + 1.0f + ), + "foo", + null + ); + + // Non-default rank window size + retriever = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo2", + DEFAULT_RANK_WINDOW_SIZE * 2, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.0f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), + 1.0f + ), + "foo2", + null + ); + + // All-fields wildcard + retriever = new RRFRetrieverBuilder( + null, + List.of("*"), + "qux", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("*", 1.0f), List.of()), // no index filter for the lexical retriever + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), + 1.0f, + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), + 1.0f + ), + "qux", + null + ); + } + + public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { + String indexName = "test-index"; + String anotherIndexName = "test-another-index"; + final ResolvedIndices resolvedIndices = createMockResolvedIndices( + Map.of( + indexName, + List.of("semantic_field_1", "semantic_field_2"), + anotherIndexName, + List.of("semantic_field_2", "semantic_field_3") + ), + null, + Map.of("semantic_field_2", "common_inference_id") // use the same inference ID for semantic_field_2 + ); + + final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( + parserConfig(), + null, + null, + TransportVersion.current(), + RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, + resolvedIndices, + new PointInTimeBuilder(new BytesArray("pitid")), + null, + null + ); + + // No wildcards, no per-field boosting + RRFRetrieverBuilder retriever = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.0f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f), + "foo", + null + ); + + // Non-default rank window size + retriever = new RRFRetrieverBuilder( + null, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo2", + DEFAULT_RANK_WINDOW_SIZE * 2, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.0f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 1.0f), + "foo2", + null + ); + + // All-fields wildcard + retriever = new RRFRetrieverBuilder( + null, + List.of("*"), + "qux", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("*", 1.0f), List.of()), // on index filter on the lexical query + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of()), // no index filter since both indices have this field + 1.0f, + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), + 1.0f + ), + "qux", + null + ); + } + public void testSearchRemoteIndex() { final ResolvedIndices resolvedIndices = createMockResolvedIndices( Map.of("local-index", List.of()), diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml index cd03280691929..2ae6ad778e030 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml @@ -35,6 +35,36 @@ setup: "task_settings": { } } + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-another-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-another-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 128, + "similarity": "cosine", + "api_key": "xyz" + }, + "task_settings": { + } + } - do: indices.create: @@ -110,6 +140,66 @@ setup: "baz": 1.0 } } + - do: + indices.create: + index: test-another-index + body: + mappings: + properties: + keyword: + type: keyword + dense_inference: + type: semantic_text + inference_id: dense-inference-id + sparse_inference: + type: semantic_text + inference_id: sparse-inference-another-id + text_1: + type: semantic_text + inference_id: dense-inference-another-id + text_2: + type: text + sparse_vector: + type: sparse_vector + + - do: + bulk: + index: test-another-index + refresh: true + body: | + {"index": {"_id": "4"}} + { + "keyword": "keyword match 1", + "dense_inference": "You know", + "sparse_inference": "For Search", + "text_1": "Be excellent to each other", + "text_2": "xyz match match 2", + "sparse_vector": { + "foo": 1.0 + } + } + {"index": {"_id": "5"}} + { + "keyword": "keyword match 2", + "dense_inference": "Elasticsearch is simply the best", + "sparse_inference": "Better than all the rest", + "text_1": "Better than another vector database", + "text_2": "yy match match 4", + "sparse_vector": { + "bar": 1.0 + } + } + {"index": {"_id": "6"}} + { + "keyword": "keyword match 3", + "dense_inference": "Elasticsearch is the best vector database", + "sparse_inference": "Live long and prosper", + "text_1": "Most excellent", + "text_2": "z match match 6", + "sparse_vector": { + "baz": 1.0 + } + } --- "Query all fields using the simplified format": @@ -265,43 +355,6 @@ setup: - match: { hits.total.value: 0 } - length: { hits.hits: 0 } ---- -"Multi-index searches are not allowed": - - do: - indices.create: - index: test-index-2 - - - do: - catch: bad_request - search: - index: [ test-index, test-index-2 ] - body: - retriever: - rrf: - query: "match" - - - match: { error.root_cause.0.reason: "[rrf] cannot specify [query] when querying multiple indices" } - - - do: - indices.put_alias: - index: test-index - name: test-alias - - do: - indices.put_alias: - index: test-index-2 - name: test-alias - - - do: - catch: bad_request - search: - index: test-alias - body: - retriever: - rrf: - query: "match" - - - match: { error.root_cause.0.reason: "[rrf] cannot specify [query] when querying multiple indices" } - --- "Wildcard field patterns that do not resolve to any field are handled gracefully": - do: @@ -429,3 +482,49 @@ setup: - match: { hits.hits.0._id: "2" } - match: { hits.hits.1._id: "1" } - match: { hits.hits.2._id: "3" } + + +--- +"Queries multiple indices using default_field": + - requires: + cluster_features: [ "rrf_retriever.multi_index_simplified_format_support" ] + reason: "Support for querying multiple indices in simplified RRF retriever" + - do: + search: + index: test-index,test-another-index + body: + retriever: + rrf: + query: "match" + + - match: { hits.total.value: 6 } + - length: { hits.hits: 6 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "2" } + - match: { hits.hits.4._id: "3" } + - match: { hits.hits.5._id: "5" } + +--- +"Queries multiple indices with a list of fields": + - requires: + cluster_features: [ "rrf_retriever.multi_index_simplified_format_support" ] + reason: "Support for querying multiple indices in simplified RRF retriever" + - do: + search: + index: test-index,test-another-index + body: + retriever: + rrf: + fields: ["dense_inference", "text_1", "sparse_vector"] + query: "match" + + - match: { hits.total.value: 6 } + - length: { hits.hits: 6 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.3._id: "6" } + - match: { hits.hits.4._id: "4" } + - match: { hits.hits.5._id: "5" } From 83e66d735f9943ef517bf8ea6642c47f0aef690f Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 16 Sep 2025 18:32:27 +0200 Subject: [PATCH 2/6] Update docs/changelog/134822.yaml --- docs/changelog/134822.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/134822.yaml diff --git a/docs/changelog/134822.yaml b/docs/changelog/134822.yaml new file mode 100644 index 0000000000000..65f0e89e22324 --- /dev/null +++ b/docs/changelog/134822.yaml @@ -0,0 +1,5 @@ +pr: 134822 +summary: Query multiple indices with simplified RRF +area: Relevance +type: enhancement +issues: [] From 035b25bfd3f7cd664a1e91f590cf87b87d923762 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 16 Sep 2025 16:39:07 +0000 Subject: [PATCH 3/6] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 97b3ab2013b88..81994992b3249 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -236,7 +236,7 @@ public void testMultiFieldsParamsRewrite() { ); } - public void testMultiIndexMultiFieldsParamsRewrite() { + public void testMultiIndexMultiFieldsParamsRewrite() { String indexName = "test-index"; String anotherIndexName = "test-another-index"; final ResolvedIndices resolvedIndices = createMockResolvedIndices( From 633efca75fdb0ba830e9d02299530981b55b169b Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 16 Sep 2025 20:28:43 +0200 Subject: [PATCH 4/6] Update changelog entry --- docs/changelog/134822.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/134822.yaml b/docs/changelog/134822.yaml index 65f0e89e22324..7628a86c023c7 100644 --- a/docs/changelog/134822.yaml +++ b/docs/changelog/134822.yaml @@ -1,5 +1,5 @@ pr: 134822 -summary: Query multiple indices with simplified RRF +summary: Support querying multiple indices with the simplified RRF retriever area: Relevance type: enhancement issues: [] From 83f73d44f7d2a961586ea49ae1918e9c6580976f Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 16 Sep 2025 20:44:53 +0200 Subject: [PATCH 5/6] Add test for wildcard patterns --- .../rank/rrf/RRFRetrieverBuilderTests.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 81994992b3249..959847ba344b2 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -292,6 +292,33 @@ public void testMultiIndexMultiFieldsParamsRewrite() { null ); + // Glob matching on inference and non-inference fields + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*", "field_1", "*_field_1", "semantic_*"), + "baz2", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), + 1.0f, + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), + 1.0f + ), + "baz2", + null + ); + // Non-default rank window size retriever = new RRFRetrieverBuilder( null, From c7977c9ee68fa3b8a76d3858aa4875e2f4ed8546 Mon Sep 17 00:00:00 2001 From: Ioana Tagirta Date: Tue, 16 Sep 2025 20:49:43 +0200 Subject: [PATCH 6/6] And another test --- .../rank/rrf/RRFRetrieverBuilderTests.java | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 959847ba344b2..6a0ad75cc721a 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -449,6 +449,31 @@ public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { null ); + // Glob matching on inference and non-inference fields + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*", "field_1", "*_field_1", "semantic_*"), + "baz2", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 1.0f, + new Tuple<>("semantic_field_2", List.of()), + 1.0f, + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), + 1.0f + ), + "baz2", + null + ); + // All-fields wildcard retriever = new RRFRetrieverBuilder( null,