From df2c607c72f83fae2801568618b1363b1f33aef6 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 18 Sep 2025 16:28:36 -0400 Subject: [PATCH 1/2] Aligned RRF and linear retriever unit tests --- .../linear/LinearRetrieverBuilderTests.java | 19 ++ .../rank/rrf/RRFRetrieverBuilderTests.java | 276 +++++++++--------- 2 files changed, 164 insertions(+), 131 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index 170265ec0723d..bcf7a2914d9b1 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -101,6 +101,25 @@ public void testMultiFieldsParamsRewrite() { MinMaxScoreNormalizer.INSTANCE ); + // Zero weights + retriever = new LinearRetrieverBuilder( + null, + List.of("field_1^0", "field_2^1.0"), + "zero_test", + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[0], + new ScoreNormalizer[0] + ); + assertMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of("field_1", 0.0f, "field_2", 1.0f), + Map.of(), + "zero_test", + MinMaxScoreNormalizer.INSTANCE + ); + // Glob matching on inference and non-inference fields with per-field boosting retriever = new LinearRetrieverBuilder( null, diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java index 402303ef96d18..1bb69ea269a4e 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/rrf/RRFRetrieverBuilderTests.java @@ -167,7 +167,7 @@ public void testMultiFieldsParamsRewrite() { null ); - // No wildcards + // No wildcards, no per-field boosting RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder( null, List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), @@ -200,27 +200,9 @@ public void testMultiFieldsParamsRewrite() { Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), "foo2" ); - } - public void testMultiFieldsParamsRewriteWithWeights() { - final String indexName = "test-index"; - final List testInferenceFields = List.of("semantic_field_1", "semantic_field_2"); - final ResolvedIndices resolvedIndices = createMockResolvedIndices(Map.of(indexName, testInferenceFields), null, Map.of()); - final QueryRewriteContext queryRewriteContext = new QueryRewriteContext( - parserConfig(), - null, - null, - TransportVersion.current(), - RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, - resolvedIndices, - new PointInTimeBuilder(new BytesArray("pitid")), - null, - null, - false - ); - - // Simple per-field boosting - RRFRetrieverBuilder rrfRetrieverBuilder = new RRFRetrieverBuilder( + // No wildcards, per-field boosting + rrfRetrieverBuilder = new RRFRetrieverBuilder( null, List.of("field_1", "field_2^1.5", "semantic_field_1", "semantic_field_2^2"), "bar", @@ -233,65 +215,10 @@ public void testMultiFieldsParamsRewriteWithWeights() { queryRewriteContext, Map.of("field_1", 1.0f, "field_2", 1.5f), Map.of("semantic_field_1", 1.0f, "semantic_field_2", 2.0f), - "bar", - null - ); - - // Glob matching on inference and non-inference fields with per-field boosting - rrfRetrieverBuilder = new RRFRetrieverBuilder( - null, - List.of("field_*^1.5", "*_field_1^2.5"), - "baz", - DEFAULT_RANK_WINDOW_SIZE, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, - new float[0] - ); - assertMultiFieldsParamsRewrite( - rrfRetrieverBuilder, - queryRewriteContext, - Map.of("field_*", 1.5f, "*_field_1", 2.5f), - Map.of("semantic_field_1", 2.5f), - "baz", - null + "bar" ); - // Multiple boosts defined on the same field - rrfRetrieverBuilder = new RRFRetrieverBuilder( - null, - List.of("field_*^1.5", "field_1^3.0", "*_field_1^2.5", "semantic_*^1.5"), - "baz2", - DEFAULT_RANK_WINDOW_SIZE, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, - new float[0] - ); - assertMultiFieldsParamsRewrite( - rrfRetrieverBuilder, - queryRewriteContext, - Map.of("field_*", 1.5f, "field_1", 3.0f, "*_field_1", 2.5f, "semantic_*", 1.5f), - Map.of("semantic_field_1", 3.75f, "semantic_field_2", 1.5f), - "baz2", - null - ); - - // All-fields wildcard with weights - rrfRetrieverBuilder = new RRFRetrieverBuilder( - null, - List.of("*^2.0"), - "qux", - DEFAULT_RANK_WINDOW_SIZE, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, - new float[0] - ); - assertMultiFieldsParamsRewrite( - rrfRetrieverBuilder, - queryRewriteContext, - Map.of("*", 2.0f), - Map.of("semantic_field_1", 2.0f, "semantic_field_2", 2.0f), - "qux", - null - ); - - // Zero weights (testing that zero is allowed as non-negative) + // Zero weights rrfRetrieverBuilder = new RRFRetrieverBuilder( null, List.of("field_1^0", "field_2^1.0"), @@ -305,15 +232,14 @@ public void testMultiFieldsParamsRewriteWithWeights() { queryRewriteContext, Map.of("field_1", 0.0f, "field_2", 1.0f), Map.of(), - "zero_test", - null + "zero_test" ); - // Mixed weighted and unweighted fields in simplified syntax + // Glob matching on inference and non-inference fields with per-field boosting rrfRetrieverBuilder = new RRFRetrieverBuilder( null, - List.of("title^2.5", "content", "tags^1.5", "description"), - "test query", + List.of("field_*^1.5", "*_field_1^2.5"), + "baz", DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, new float[0] @@ -321,17 +247,16 @@ public void testMultiFieldsParamsRewriteWithWeights() { assertMultiFieldsParamsRewrite( rrfRetrieverBuilder, queryRewriteContext, - Map.of("title", 2.5f, "content", 1.0f, "tags", 1.5f, "description", 1.0f), - Map.of(), - "test query", - null + Map.of("field_*", 1.5f, "*_field_1", 2.5f), + Map.of("semantic_field_1", 2.5f), + "baz" ); - // Decimal weight precision handling + // Multiple boosts defined on the same field rrfRetrieverBuilder = new RRFRetrieverBuilder( null, - List.of("field1^0.1", "field2^2.75", "field3^10.999"), - "test query", + List.of("field_*^1.5", "field_1^3.0", "*_field_1^2.5", "semantic_*^1.5"), + "baz2", DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, new float[0] @@ -339,27 +264,27 @@ public void testMultiFieldsParamsRewriteWithWeights() { assertMultiFieldsParamsRewrite( rrfRetrieverBuilder, queryRewriteContext, - Map.of("field1", 0.1f, "field2", 2.75f, "field3", 10.999f), - Map.of(), - "test query", - null + Map.of("field_*", 1.5f, "field_1", 3.0f, "*_field_1", 2.5f, "semantic_*", 1.5f), + Map.of("semantic_field_1", 3.75f, "semantic_field_2", 1.5f), + "baz2" ); - // Test negative weight validation - RRFRetrieverBuilder negativeWeightBuilder = new RRFRetrieverBuilder( + // All-fields wildcard + rrfRetrieverBuilder = new RRFRetrieverBuilder( null, - List.of("field_1^-1.0"), - "negative_test", + List.of("*"), + "qux", DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, new float[0] ); - - IllegalArgumentException iae = expectThrows( - IllegalArgumentException.class, - () -> negativeWeightBuilder.doRewrite(queryRewriteContext) + assertMultiFieldsParamsRewrite( + rrfRetrieverBuilder, + queryRewriteContext, + Map.of("*", 1.0f), + Map.of("semantic_field_1", 1.0f, "semantic_field_2", 1.0f), + "qux" ); - assertEquals("[rrf] per-field weights must be non-negative", iae.getMessage()); } public void testMultiIndexMultiFieldsParamsRewrite() { @@ -418,60 +343,108 @@ public void testMultiIndexMultiFieldsParamsRewrite() { null ); - // Glob matching on inference and non-inference fields + // Non-default rank window size and non-default rank_constant retriever = new RRFRetrieverBuilder( null, - List.of("field_*", "field_1", "*_field_1", "semantic_*"), - "baz2", - DEFAULT_RANK_WINDOW_SIZE, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), + "foo2", + DEFAULT_RANK_WINDOW_SIZE * 2, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT * 2, new float[0] ); assertMultiIndexMultiFieldsParamsRewrite( retriever, queryRewriteContext, - Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.0f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), Map.of( new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of(anotherIndexName)), - 1.0f, - new Tuple<>("semantic_field_3", List.of(anotherIndexName)), 1.0f ), - "baz2", + "foo2", null ); - // Non-default rank window size and non-default rank_constant + // No wildcards, per-field boosting retriever = new RRFRetrieverBuilder( null, - List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), - "foo2", - DEFAULT_RANK_WINDOW_SIZE * 2, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT * 2, + List.of("field_1", "field_2^1.5", "semantic_field_1", "semantic_field_2^2"), + "bar", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, new float[0] ); assertMultiIndexMultiFieldsParamsRewrite( retriever, queryRewriteContext, Map.of( - Map.of("field_1", 1.0f, "field_2", 1.0f), + Map.of("field_1", 1.0f, "field_2", 1.5f), List.of(indexName), - Map.of("field_1", 1.0f, "field_2", 1.0f, "semantic_field_1", 1.0f), + Map.of("field_1", 1.0f, "field_2", 1.5f, "semantic_field_1", 1.0f), List.of(anotherIndexName) ), Map.of( new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of(indexName)), - 1.0f, + 2.0f, new Tuple<>("semantic_field_2", List.of(anotherIndexName)), - 1.0f + 2.0f ), - "foo2", + "bar", + null + ); + + // Glob matching on inference and non-inference fields with per-field boosting + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*^1.5", "*_field_1^2.5"), + "baz", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("field_*", 1.5f, "*_field_1", 2.5f), List.of()), + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 2.5f), + "baz", + null + ); + + // Multiple boosts defined on the same field + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*^1.5", "field_1^3.0", "*_field_1^2.5", "semantic_*^1.5"), + "baz2", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("field_*", 1.5f, "field_1", 3.0f, "*_field_1", 2.5f, "semantic_*", 1.5f), List.of()), + Map.of( + new Tuple<>("semantic_field_1", List.of(indexName)), + 3.75f, + new Tuple<>("semantic_field_2", List.of(indexName)), + 1.5f, + new Tuple<>("semantic_field_2", List.of(anotherIndexName)), + 1.5f, + new Tuple<>("semantic_field_3", List.of(anotherIndexName)), + 1.5f + ), + "baz2", null ); @@ -552,13 +525,13 @@ public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { null ); - // Non-default rank window size + // Non-default rank window size and rank constant retriever = new RRFRetrieverBuilder( null, List.of("field_1", "field_2", "semantic_field_1", "semantic_field_2"), "foo2", DEFAULT_RANK_WINDOW_SIZE * 2, - RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT * 2, new float[0] ); assertMultiIndexMultiFieldsParamsRewrite( @@ -575,10 +548,51 @@ public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { null ); - // Glob matching on inference and non-inference fields + // No wildcards, per-field boosting retriever = new RRFRetrieverBuilder( null, - List.of("field_*", "field_1", "*_field_1", "semantic_*"), + List.of("field_1", "field_2^1.5", "semantic_field_1", "semantic_field_2^2"), + "bar", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of( + Map.of("field_1", 1.0f, "field_2", 1.5f), + List.of(indexName), + Map.of("field_1", 1.0f, "field_2", 1.5f, "semantic_field_1", 1.0f), + List.of(anotherIndexName) + ), + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 1.0f, new Tuple<>("semantic_field_2", List.of()), 2.0f), + "bar", + null + ); + + // Glob matching on inference and non-inference fields with per-field boosting + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*^1.5", "*_field_1^2.5"), + "baz", + DEFAULT_RANK_WINDOW_SIZE, + RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, + new float[0] + ); + assertMultiIndexMultiFieldsParamsRewrite( + retriever, + queryRewriteContext, + Map.of(Map.of("field_*", 1.5f, "*_field_1", 2.5f), List.of()), // on index filter on the lexical query + Map.of(new Tuple<>("semantic_field_1", List.of(indexName)), 2.5f), + "baz", + null + ); + + // Multiple boosts defined on the same field + retriever = new RRFRetrieverBuilder( + null, + List.of("field_*^1.5", "field_1^3.0", "*_field_1^2.5", "semantic_*^1.5"), "baz2", DEFAULT_RANK_WINDOW_SIZE, RRFRetrieverBuilder.DEFAULT_RANK_CONSTANT, @@ -587,14 +601,14 @@ public void testMultiIndexMultiFieldsParamsRewriteWithSameInferenceIds() { assertMultiIndexMultiFieldsParamsRewrite( retriever, queryRewriteContext, - Map.of(Map.of("field_*", 1.0f, "field_1", 1.0f, "*_field_1", 1.0f, "semantic_*", 1.0f), List.of()), + Map.of(Map.of("field_*", 1.5f, "field_1", 3.0f, "*_field_1", 2.5f, "semantic_*", 1.5f), List.of()), Map.of( new Tuple<>("semantic_field_1", List.of(indexName)), - 1.0f, - new Tuple<>("semantic_field_2", List.of()), - 1.0f, + 3.75f, + new Tuple<>("semantic_field_2", List.of()), // no index filter since both indices have this field + 1.5f, new Tuple<>("semantic_field_3", List.of(anotherIndexName)), - 1.0f + 1.5f ), "baz2", null From a9b25c3a2808bed958141da42c8d4051f9ef8b9f Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 18 Sep 2025 17:46:42 -0400 Subject: [PATCH 2/2] Adjusted YAML tests --- .../test/rrf/310_rrf_retriever_simplified.yml | 78 +------------------ 1 file changed, 1 insertion(+), 77 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml index 4a42e5170ca44..5827803116a13 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/310_rrf_retriever_simplified.yml @@ -2,7 +2,7 @@ setup: - requires: cluster_features: [ "rrf_retriever.multi_fields_query_format_support" ] reason: "RRF retriever multi-fields query format support" - test_runner_features: [ "contains", "headers", "close_to" ] + test_runner_features: [ "contains" ] - do: inference.put: @@ -224,8 +224,6 @@ setup: reason: "Simplified weighted fields syntax support" - do: - headers: - Content-Type: application/json search: index: test-index body: @@ -238,13 +236,9 @@ setup: - match: { hits.total.value: 2 } - length: { hits.hits: 2 } - match: { hits.hits.0._id: "1" } - - gt: { hits.hits.0._score: 0.0 } - match: { hits.hits.1._id: "3" } - - gt: { hits.hits.1._score: 0.0 } - do: - headers: - Content-Type: application/json search: index: test-index body: @@ -257,9 +251,7 @@ setup: - match: { hits.total.value: 2 } - length: { hits.hits: 2 } - match: { hits.hits.0._id: "3" } - - gt: { hits.hits.0._score: 0.0 } - match: { hits.hits.1._id: "1" } - - gt: { hits.hits.1._score: 0.0 } --- "Semantic match per-field boosting using the simplified format": @@ -272,8 +264,6 @@ setup: # services generate consistent vectors (i.e. same input -> same output) to demonstrate that per-field boosting on # a semantic_text field can change the result order. - do: - headers: - Content-Type: application/json search: index: test-index body: @@ -282,19 +272,13 @@ setup: fields: [ "dense_inference", "sparse_inference" ] query: "distributed, RESTful, search engine" - # Semantic-only match, so max RRF score for rank 1 with default rank_constant (60) is 1/(60+1) = 0.01639 - match: { hits.total.value: 3 } - length: { hits.hits: 3 } - match: { hits.hits.0._id: "2" } - - close_to: { hits.hits.0._score: { value: 0.01639, error: 0.0001 } } - match: { hits.hits.1._id: "3" } - - lt: { hits.hits.1._score: 1.0 } - match: { hits.hits.2._id: "1" } - - lt: { hits.hits.2._score: 1.0 } - do: - headers: - Content-Type: application/json search: index: test-index body: @@ -303,15 +287,11 @@ setup: fields: [ "dense_inference^3", "sparse_inference" ] query: "distributed, RESTful, search engine" - # Semantic-only match with boosted dense_inference field, so max RRF score for rank 1 is still 1/(60+1) = 0.01639 - match: { hits.total.value: 3 } - length: { hits.hits: 3 } - match: { hits.hits.0._id: "3" } - - close_to: { hits.hits.0._score: { value: 0.01639, error: 0.0001 } } - match: { hits.hits.1._id: "2" } - - lt: { hits.hits.1._score: 1.0 } - match: { hits.hits.2._id: "1" } - - lt: { hits.hits.2._score: 1.0 } --- "Negative weight validation": @@ -331,43 +311,6 @@ setup: - match: { error.root_cause.0.reason: "[rrf] per-field weights must be non-negative" } ---- -"Zero weight handling": - - requires: - cluster_features: ["rrf_retriever.simplified_weighted_support"] - reason: "Simplified weighted fields syntax support" - - - do: - search: - index: test-index - body: - retriever: - rrf: - fields: ["text_1^0", "text_2^1"] - query: "foo" - - - gte: { hits.total.value: 1 } - ---- -"Basic per-field boosting using the simplified format": - - requires: - cluster_features: ["rrf_retriever.simplified_weighted_support"] - reason: "Simplified weighted fields syntax support" - - - do: - search: - index: test-index - body: - retriever: - rrf: - fields: [ "text_1", "text_2^2" ] - query: "foo" - - # With weighted fields, verify basic functionality - - gte: { hits.total.value: 1 } - - length: { hits.hits: 1 } - # Verify that text_2^2 affects ranking (basic smoke test) - --- "Can query text fields": - do: @@ -620,25 +563,6 @@ setup: - match: { hits.hits.1._id: "1" } - match: { hits.hits.2._id: "3" } ---- -"Semantic field weighting": - - requires: - cluster_features: ["rrf_retriever.simplified_weighted_support"] - reason: "Simplified weighted fields syntax support" - - - do: - search: - index: test-index - body: - retriever: - rrf: - fields: ["dense_inference^2", "sparse_inference^1.5"] - query: "elasticsearch" - - - match: { hits.total.value: 3 } - - length: { hits.hits: 3 } - - --- "Queries multiple indices using default_field": - requires: